import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns
from datetime import date
from datetime import datetime
from scipy import stats
# from scipy.stats import boxcox
from scipy.special import boxcox, inv_boxcox
import math
import start
Initial setup completed.
from helper import *
Helper Imported.
valid_data = pd.read_json (r'validbets.json')
invalid_data = pd.read_json (r'invalidbets.json')
valid_data.head()
| _id | stake | type | placedDate | horse | betRate | marketId | IP | eventType | userName | selectionName | marketName | event | averagePriceMatched | status | winnerId | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60337ca8cc80710087ff5d8c | 500.000 | BACK | 2021-02-22T09:43:04.686Z | 8776882 | 1.020 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.020 | WINNER_DECLARED | 8776882 |
| 1 | 60337d5dcc80710087ff5d90 | 2100.000 | LAY | 2021-02-22T09:46:05.458Z | 8776882 | 1.010 | 1.180 | 49.36.123.125 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.010 | WINNER_DECLARED | 8776882 |
| 2 | 60337b0a13046300869b42c4 | 25000.000 | LAY | 2021-02-22T09:36:10.372Z | 8776882 | 1.060 | 1.180 | 2405:201:25:d0aa:11b4:2e1c:9999:f32a | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.050 | WINNER_DECLARED | 8776882 |
| 3 | 60337ae4b0517a00b16380e8 | 300.000 | LAY | 2021-02-22T09:35:32.772Z | 8776882 | 1.070 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.070 | WINNER_DECLARED | 8776882 |
| 4 | 60337ab7f343e00049c69416 | 500.000 | BACK | 2021-02-22T09:34:47.648Z | 8776882 | 1.080 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.080 | WINNER_DECLARED | 8776882 |
print("Valid Data Shape : ",valid_data.shape)
print("No. of Data points : ", valid_data.shape[0])
print("No. of Fetures : ", valid_data.shape[1]-1)
Valid Data Shape : (10000, 16) No. of Data points : 10000 No. of Fetures : 15
invalid_data.head(3)
| _id | stake | type | placedDate | horse | betRate | marketId | IP | eventType | userName | selectionName | marketName | event | averagePriceMatched | status | winnerId | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5fa47215305f1f00f02b5ae4 | 783758 | LAY | 2020-11-05T21:43:49.222Z | 127991 | 160.000 | 1.175 | 185.105.2.158 | Soccer | contra11 | AC Milan | Match Odds | AC Milan v Lille | 160.000 | INVALID_BET | 44790 |
| 1 | 5fa014d9d1a79f00d41349df | 4500 | BACK | 2020-11-02T14:16:57.494Z | 22121561 | 1.960 | 1.175 | 106.204.14.80 | Cricket | sirsa3 | Delhi Capitals | Match Odds | Delhi Capitals v Royal Challengers Bangalore | 1.970 | INVALID_BET | 22121561 |
| 2 | 5fa014db5a7a0a00e2868190 | 7000 | BACK | 2020-11-02T14:16:59.763Z | 22121561 | 1.960 | 1.175 | 185.203.122.18 | Cricket | bhush001 | Delhi Capitals | Match Odds | Delhi Capitals v Royal Challengers Bangalore | 1.960 | INVALID_BET | 22121561 |
print("Invalid Data Shape : ",invalid_data.shape)
print("No. of Data points : ", invalid_data.shape[0])
print("No. of Fetures : ", invalid_data.shape[1]-1)
Invalid Data Shape : (66, 16) No. of Data points : 66 No. of Fetures : 15
merged_data = pd.concat([valid_data,invalid_data])
merged_data.head(3)
| _id | stake | type | placedDate | horse | betRate | marketId | IP | eventType | userName | selectionName | marketName | event | averagePriceMatched | status | winnerId | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60337ca8cc80710087ff5d8c | 500.000 | BACK | 2021-02-22T09:43:04.686Z | 8776882 | 1.020 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.020 | WINNER_DECLARED | 8776882 |
| 1 | 60337d5dcc80710087ff5d90 | 2100.000 | LAY | 2021-02-22T09:46:05.458Z | 8776882 | 1.010 | 1.180 | 49.36.123.125 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.010 | WINNER_DECLARED | 8776882 |
| 2 | 60337b0a13046300869b42c4 | 25000.000 | LAY | 2021-02-22T09:36:10.372Z | 8776882 | 1.060 | 1.180 | 2405:201:25:d0aa:11b4:2e1c:9999:f32a | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.050 | WINNER_DECLARED | 8776882 |
print("Data Shape : ",merged_data.shape)
print("No. of Data points : ", merged_data.shape[0])
print("No. of Fetures : ", merged_data.shape[1]-1)
Data Shape : (10066, 16) No. of Data points : 10066 No. of Fetures : 15
heading('Backup copy of Data')
original_data = merged_data.copy()
print("Data Shape : ",original_data.shape)
print("No. of Data points : ", original_data.shape[0])
print("No. of Fetures : ", original_data.shape[1]-1)
------------------- Backup copy of Data ------------------- Data Shape : (10066, 16) No. of Data points : 10066 No. of Fetures : 15
merged_data.to_csv('merge_data.csv', index=False)
valid_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10000 entries, 0 to 9999 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 _id 10000 non-null object 1 stake 10000 non-null float64 2 type 10000 non-null object 3 placedDate 10000 non-null object 4 horse 10000 non-null int64 5 betRate 10000 non-null float64 6 marketId 10000 non-null float64 7 IP 9998 non-null object 8 eventType 10000 non-null object 9 userName 10000 non-null object 10 selectionName 10000 non-null object 11 marketName 10000 non-null object 12 event 10000 non-null object 13 averagePriceMatched 10000 non-null float64 14 status 10000 non-null object 15 winnerId 10000 non-null int64 dtypes: float64(4), int64(2), object(10) memory usage: 1.2+ MB
Observation :
invalid_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 66 entries, 0 to 65 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 _id 66 non-null object 1 stake 66 non-null int64 2 type 66 non-null object 3 placedDate 66 non-null object 4 horse 66 non-null int64 5 betRate 66 non-null float64 6 marketId 66 non-null float64 7 IP 65 non-null object 8 eventType 66 non-null object 9 userName 66 non-null object 10 selectionName 66 non-null object 11 marketName 66 non-null object 12 event 66 non-null object 13 averagePriceMatched 66 non-null float64 14 status 66 non-null object 15 winnerId 66 non-null int64 dtypes: float64(3), int64(3), object(10) memory usage: 8.4+ KB
Observation :
describe(valid_data).T
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| stake | 10000.000 | 31411.858 | 145336.923 | 9.000 | 367.500 | 2000.000 | 20000.000 | 9300000.000 | 43740.049 | 31.645 | 1747.867 |
| horse | 10000.000 | 4998336.836 | 4887547.417 | 448.000 | 1222344.000 | 4294272.000 | 9630879.000 | 25215583.000 | 3936271.743 | 1.092 | 0.892 |
| betRate | 10000.000 | 2.433 | 13.808 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.920 | 45.182 | 2892.427 |
| marketId | 10000.000 | 1.179 | 0.001 | 1.178 | 1.179 | 1.179 | 1.180 | 1.180 | 0.001 | -1.144 | -0.240 |
| averagePriceMatched | 10000.000 | 2.446 | 13.922 | 1.010 | 1.180 | 1.400 | 1.660 | 1000.000 | 1.945 | 44.474 | 2808.271 |
| winnerId | 10000.000 | 4785643.149 | 4693919.515 | 448.000 | 1222344.000 | 4294272.000 | 9630879.000 | 24301731.000 | 3850048.670 | 0.997 | 0.393 |
Observation :
# invalid_data.describe().T
describe(invalid_data).T
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| stake | 66.000 | 76803.606 | 176754.391 | 100.000 | 4125.000 | 9750.000 | 17657.500 | 783758.000 | 110270.253 | 2.768 | 6.812 |
| horse | 66.000 | 3861662.258 | 7916572.561 | 235.000 | 11411.250 | 86359.000 | 4294272.000 | 36846168.000 | 5163178.950 | 2.867 | 8.511 |
| betRate | 66.000 | 5.463 | 19.664 | 1.020 | 1.492 | 1.980 | 4.000 | 160.000 | 5.507 | 7.727 | 61.263 |
| marketId | 66.000 | 1.178 | 0.002 | 1.175 | 1.176 | 1.179 | 1.179 | 1.179 | 0.001 | -0.946 | -0.538 |
| averagePriceMatched | 66.000 | 5.599 | 19.678 | 1.020 | 1.520 | 2.050 | 4.000 | 160.000 | 5.682 | 7.689 | 60.850 |
| winnerId | 66.000 | 3297048.697 | 6904352.770 | 235.000 | 7461.000 | 58805.000 | 2062982.000 | 36846168.000 | 4581190.086 | 2.906 | 9.411 |
Observation :
merged_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 10066 entries, 0 to 65 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 _id 10066 non-null object 1 stake 10066 non-null float64 2 type 10066 non-null object 3 placedDate 10066 non-null object 4 horse 10066 non-null int64 5 betRate 10066 non-null float64 6 marketId 10066 non-null float64 7 IP 10063 non-null object 8 eventType 10066 non-null object 9 userName 10066 non-null object 10 selectionName 10066 non-null object 11 marketName 10066 non-null object 12 event 10066 non-null object 13 averagePriceMatched 10066 non-null float64 14 status 10066 non-null object 15 winnerId 10066 non-null int64 dtypes: float64(4), int64(2), object(10) memory usage: 1.3+ MB
describe(merged_data).T
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| stake | 10066.000 | 31709.479 | 145600.465 | 9.000 | 400.000 | 2000.000 | 20000.000 | 9300000.000 | 44194.691 | 31.301 | 1723.736 |
| horse | 10066.000 | 4990883.972 | 4913718.664 | 235.000 | 1222344.000 | 4294272.000 | 9628997.250 | 36846168.000 | 3945296.031 | 1.136 | 1.190 |
| betRate | 10066.000 | 2.452 | 13.855 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.951 | 44.571 | 2835.808 |
| marketId | 10066.000 | 1.179 | 0.001 | 1.175 | 1.179 | 1.179 | 1.180 | 1.180 | 0.001 | -1.300 | 0.975 |
| averagePriceMatched | 10066.000 | 2.466 | 13.969 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.977 | 43.882 | 2754.245 |
| winnerId | 10066.000 | 4775882.844 | 4712822.120 | 235.000 | 1221386.000 | 4294272.000 | 9630879.000 | 36846168.000 | 3856511.193 | 1.029 | 0.602 |
Observation :
merged_data['status'].value_counts()
WINNER_DECLARED 10000 INVALID_BET 66 Name: status, dtype: int64
def pie_labeling(x):
print(x)
return '{:.4f}%\n(#{:.0f})'.format(x, sums.values.sum()*x/100)
from matplotlib.pyplot import pie, axis, show
sums = merged_data['status'].value_counts()
axis('equal')
pie(sums.values, labels=sums.index, autopct=pie_labeling, pctdistance=1.3, labeldistance=1.6)
plt.title("All Data - Dependent Variable ('status') distribution")
plt.show()
99.3443250656128 0.6556725595146418
Observation :
merged_data.isnull().values.any()
True
# null_col = merged_data.columns[merged_data.isnull().any()].tolist()
# merged_data[null_col].isnull().sum()
# merged_data.isnull().sum()
missing_values_table(merged_data)
Your selected dataframe has 16 columns. There are 1 columns that have missing values.
| Missing Values | % of Total Values | |
|---|---|---|
| IP | 3 | 0.000 |
Observation :
heading("Number of unique values in each feature")
print(merged_data.nunique())
--------------------------------------- Number of unique values in each feature --------------------------------------- _id 10066 stake 376 type 2 placedDate 10063 horse 319 betRate 258 marketId 390 IP 1070 eventType 3 userName 410 selectionName 318 marketName 10 event 245 averagePriceMatched 726 status 2 winnerId 195 dtype: int64
categorical_list = ['type', 'horse', 'marketId', 'IP', 'eventType', 'userName', 'selectionName', 'marketName', 'event','winnerId']
numerical_list = ['stake', 'betRate', 'averagePriceMatched']
datetime_list = ['placedDate']
merged_data[numerical_list].nunique()
stake 376 horse 319 betRate 258 averagePriceMatched 726 dtype: int64
merged_data[categorical_list].nunique()
type 2 marketId 390 IP 1070 eventType 3 userName 410 selectionName 318 marketName 10 event 245 winnerId 195 dtype: int64
merged_data[datetime_list].nunique()
placedDate 10063 dtype: int64
merged_data['_id'].nunique()
10066
Observation :
heading("Categorical Features")
print('\n'.join(categorical_list))
-------------------- Categorical Features -------------------- type marketId IP eventType userName selectionName marketName event winnerId
# change_dtype_cat(merged_data, 'type', False)
# merged_data['type'].dtype
merged_data['type'].value_counts()
LAY 5303 BACK 4763 Name: type, dtype: int64
count_plot(merged_data, 'type', rotation=0, size=(6,5))
crosstab_by_y_plot(merged_data, 'type', figsize=(6,5))
---------------------------- type grouped by status Count ----------------------------
| status | INVALID_BET | WINNER_DECLARED |
|---|---|---|
| type | ||
| BACK | 35 | 4728 |
| LAY | 31 | 5272 |
Observation :
# describe(merged_data['horse'])
merged_data['horse'].nunique()
319
count_plot(merged_data, 'horse')
# merged_data['horse'].value_counts()
# histplot(merged_data, 'horse', hue='status', kde=True)
# sns.histplot(np.log(merged_data['horse']))
crosstab_by_y(merged_data, 'horse', transposed=True)
----------------------------- horse grouped by status Count -----------------------------
| horse | 235 | 448 | 1096 | 1117 | 1189 | 1703 | 2426 | 2685 | 7407 | 7461 | 7659 | 9162 | 9163 | 10501 | 10761 | 10774 | 10779 | 13360 | 13362 | 14072 | 16606 | 28191 | 28214 | 28220 | 28223 | 37302 | 37303 | 41433 | 44503 | 44504 | 44507 | 44508 | 44518 | 44519 | 44521 | 44526 | 44785 | 44787 | 44790 | 44793 | 44794 | 44795 | 44796 | 44797 | 44798 | 44800 | 46726 | 47972 | 47973 | 47998 | 47999 | 48043 | 48044 | 48224 | 48351 | 48451 | 48461 | 48470 | 48756 | 48759 | 48783 | 48784 | 48785 | 48786 | 48787 | 48793 | 48799 | 49058 | 50347 | 50349 | 51404 | 55190 | 55223 | 55243 | 55264 | 55270 | 55271 | 56036 | 56298 | 56299 | 56301 | 56323 | 56343 | 56363 | 56764 | 56966 | 56967 | 58805 | 58943 | 59044 | 60294 | 60295 | 60297 | 60303 | 60310 | 60443 | 62683 | 62684 | 63347 | 64374 | 64964 | 65352 | 65778 | 66183 | 67143 | 69718 | 69720 | 70385 | 70468 | 77586 | 78864 | 79323 | 79343 | 84649 | 86359 | 113123 | 113125 | 113187 | 113191 | 113239 | 121724 | 127991 | 191604 | 191607 | 198124 | 198136 | 198138 | 199184 | 199545 | 201261 | 201327 | 208035 | 214865 | 215817 | 215821 | 215829 | 247969 | 259394 | 269792 | 298233 | 309111 | 309687 | 309689 | 347774 | 350594 | 361329 | 361706 | 419126 | 476499 | 482032 | 489720 | 495321 | 498560 | 501200 | 505726 | 508773 | 522046 | 522049 | 522054 | 571273 | 674742 | 676464 | 676465 | 676467 | 924268 | 965417 | 968185 | 1029663 | 1088499 | 1205121 | 1205126 | 1221385 | 1221386 | 1222344 | 1222345 | 1222346 | 1222347 | 1254317 | 1485567 | 1485568 | 1485573 | 1557297 | 2009654 | 2013140 | 2047448 | 2080735 | 2081063 | 2249229 | 2250259 | 2250353 | 2255452 | 2257536 | 2263603 | 2263634 | 2312313 | 2312315 | 2469649 | 2487036 | 2506293 | 2542448 | 2542449 | 2810072 | 3158851 | 3186303 | 3237590 | 3258153 | 3630179 | 3691700 | 3809606 | 3954225 | 4294272 | 4294273 | 4297012 | 4638399 | 4729711 | 4822931 | 4855758 | 4859354 | 4864974 | 4943786 | 5045297 | 5071877 | 5168454 | 5304142 | 5340398 | 5626816 | 5774350 | 5851482 | 5851483 | 5875376 | 6480414 | 6516913 | 6555433 | 6847357 | 7414058 | 7418999 | 7445660 | 7594131 | 7640637 | 7659748 | 7671296 | 7797904 | 7928242 | 8173434 | 8196374 | 8226987 | 8243874 | 8257797 | 8258569 | 8284479 | 8326752 | 8443097 | 8444055 | 8587663 | 8698678 | 8700174 | 8750569 | 8776882 | 8781581 | 8784966 | 8838645 | 8842202 | 8842295 | 8859238 | 8908192 | 9128710 | 9193006 | 9198585 | 9220660 | 9624573 | 9630472 | 9630879 | 9631399 | 9631561 | 9632088 | 9635566 | 10071088 | 10372226 | 10460263 | 10472882 | 10513040 | 10756275 | 10782588 | 10782589 | 10782634 | 10782635 | 10791812 | 10885505 | 10943306 | 11204210 | 11285506 | 11313212 | 11510005 | 11772759 | 11881072 | 12686963 | 12742426 | 12819357 | 13052998 | 13441259 | 13659467 | 13734279 | 13834991 | 16081872 | 16149511 | 16149526 | 17162689 | 17980099 | 19249131 | 19924824 | 19924825 | 19924831 | 19924941 | 21067365 | 22121561 | 24301731 | 25215583 | 36700739 | 36846168 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| status | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| INVALID_BET | 13 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 1 |
| WINNER_DECLARED | 0 | 343 | 1 | 1 | 1 | 2 | 6 | 2 | 1 | 0 | 0 | 10 | 2 | 4 | 9 | 12 | 5 | 6 | 0 | 12 | 404 | 18 | 1 | 8 | 4 | 70 | 189 | 7 | 11 | 5 | 7 | 0 | 1 | 7 | 8 | 1 | 1 | 1 | 7 | 1 | 3 | 3 | 1 | 5 | 5 | 2 | 1 | 148 | 163 | 6 | 15 | 4 | 14 | 9 | 15 | 4 | 6 | 1 | 13 | 1 | 4 | 1 | 0 | 2 | 1 | 1 | 3 | 2 | 4 | 2 | 1 | 17 | 13 | 9 | 9 | 6 | 4 | 3 | 1 | 12 | 10 | 13 | 7 | 2 | 13 | 10 | 17 | 267 | 3 | 5 | 9 | 4 | 8 | 5 | 3 | 0 | 2 | 1 | 8 | 6 | 1 | 1 | 1 | 1 | 10 | 1 | 5 | 1 | 3 | 2 | 4 | 2 | 2 | 1 | 0 | 1 | 16 | 1 | 1 | 5 | 4 | 0 | 2 | 11 | 1 | 5 | 10 | 4 | 13 | 1 | 1 | 6 | 3 | 21 | 1 | 3 | 1 | 2 | 1 | 3 | 2 | 5 | 3 | 4 | 2 | 10 | 4 | 6 | 11 | 1 | 3 | 1 | 3 | 18 | 2 | 0 | 1 | 1 | 1 | 1 | 2 | 2 | 12 | 23 | 8 | 1 | 2 | 2 | 3 | 3 | 2 | 23 | 38 | 95 | 35 | 2 | 27 | 717 | 17 | 2 | 0 | 5 | 284 | 8 | 2 | 4 | 3 | 171 | 4 | 16 | 1 | 1 | 6 | 8 | 116 | 187 | 1 | 9 | 4 | 1 | 1 | 273 | 5 | 3 | 3 | 3 | 1 | 1 | 120 | 3 | 573 | 1228 | 19 | 1 | 9 | 8 | 7 | 369 | 1 | 17 | 17 | 77 | 1 | 1 | 8 | 50 | 2 | 13 | 49 | 5 | 3 | 4 | 0 | 0 | 18 | 1 | 1 | 5 | 114 | 0 | 0 | 29 | 16 | 13 | 4 | 0 | 73 | 17 | 7 | 2 | 16 | 15 | 1 | 3 | 5 | 6 | 2 | 22 | 2 | 2 | 5 | 7 | 7 | 9 | 9 | 15 | 7 | 11 | 12 | 5 | 2 | 15 | 11 | 2 | 9 | 3 | 24 | 9 | 8 | 1 | 5 | 1 | 8 | 751 | 531 | 412 | 7 | 6 | 1 | 23 | 36 | 24 | 10 | 22 | 38 | 26 | 64 | 2 | 2 | 1 | 1 | 6 | 62 | 0 | 1 | 56 | 138 | 11 | 9 | 16 | 5 | 98 | 1 | 18 | 0 | 4 | 31 | 0 | 0 |
crosstab_by_y_table(merged_data, 'horse')
----------------------------------------------- Top 10 horse by WINNER_DECLARED and INVALID_BET -----------------------------------------------
| status | WINNER_DECLARED |
|---|---|
| horse | |
| 4294273 | 1228 |
| 10782589 | 751 |
| 1254317 | 717 |
| 4294272 | 573 |
| 10782634 | 531 |
| 10782635 | 412 |
| 16606 | 404 |
| 4859354 | 369 |
| 448 | 343 |
| 2009654 | 284 |
| status | INVALID_BET |
|---|---|
| horse | |
| 235 | 13 |
| 1221386 | 7 |
| 8226987 | 3 |
| 7671296 | 3 |
| 58805 | 3 |
| 60443 | 3 |
| 22121561 | 3 |
| 86359 | 3 |
| 47973 | 2 |
| 7461 | 2 |
# # now stack and reset
# stacked = prob.stack().reset_index().rename(columns={0:'value'})
# stacked
# merged_data['marketId']
# change_dtype_cat(merged_data, 'marketId')
merged_data['marketId'].nunique()
390
count_plot(merged_data, 'marketId')
crosstab_by_y_table(merged_data, 'marketId')
-------------------------------------------------- Top 10 marketId by WINNER_DECLARED and INVALID_BET --------------------------------------------------
| status | WINNER_DECLARED |
|---|---|
| marketId | |
| 1.178 | 889 |
| 1.179 | 747 |
| 1.179 | 587 |
| 1.180 | 557 |
| 1.179 | 538 |
| 1.178 | 538 |
| 1.180 | 470 |
| 1.178 | 420 |
| 1.180 | 416 |
| 1.179 | 398 |
| status | INVALID_BET |
|---|---|
| marketId | |
| 1.179 | 14 |
| 1.179 | 5 |
| 1.176 | 5 |
| 1.179 | 3 |
| 1.175 | 3 |
| 1.179 | 3 |
| 1.175 | 3 |
| 1.176 | 2 |
| 1.179 | 2 |
| 1.176 | 2 |
Observation :
describe(merged_data['marketId'])
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| marketId | 10066.000 | 1.179 | 0.001 | 1.175 | 1.179 | 1.179 | 1.180 | 1.180 | 0.001 | -1.300 | 0.975 |
box_violin_plot(merged_data, 'marketId')
histplot(merged_data, 'marketId')
plt.figure(figsize=(12,6))
sns.boxplot(x ='Date', y ='marketId', data = merged_data, hue ='status')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()
merged_data['IP']
0 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8
1 49.36.123.125
2 2405:201:25:d0aa:11b4:2e1c:9999:f32a
3 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8
4 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8
...
61 106.207.179.134
62 103.200.84.188
63 103.212.156.208
64 124.253.0.211
65 2001:8f8:1a63:d2a1:ac87:4f9c:69c9:509e
Name: IP, Length: 10066, dtype: object
merged_data['IP'].nunique()
1070
count_plot(merged_data, 'IP')
merged_data['IP_version'] = merged_data['IP'].apply(validIPAddress)
merged_data['IP_version'].unique()
array(['IPv6', 'IPv4', 'Invalid'], dtype=object)
crosstab_by_y_plot(merged_data, 'IP_version', figsize=(6,5))
---------------------------------- IP_version grouped by status Count ----------------------------------
| status | INVALID_BET | WINNER_DECLARED |
|---|---|---|
| IP_version | ||
| IPv4 | 52 | 6418 |
| IPv6 | 13 | 3577 |
| Invalid | 1 | 5 |
map_data = pd.read_csv('ip_Details_splitted.csv')
map_data.head(3)
| IP | Details | city | region | country | loc | org | postal | timezone | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 182.64.30.119 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS24560 Bharti Airtel Ltd., Telemedia Services', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS24560 Bharti Airtel Ltd., Telemedia Services | 110001 | Asia/Kolkata |
| 1 | 139.5.236.244 | ['Mumbai', 'Maharashtra', 'IN', '19.0728,72.8826', 'AS136334 Vortex Netsol Private Limited', '40... | Mumbai | Maharashtra | IN | 19.0728,72.8826 | AS136334 Vortex Netsol Private Limited | 400070 | Asia/Kolkata |
| 2 | 49.36.123.125 | ['Mumbai', 'Maharashtra', 'IN', '19.0728,72.8826', 'AS55836 Reliance Jio Infocomm Limited', '400... | Mumbai | Maharashtra | IN | 19.0728,72.8826 | AS55836 Reliance Jio Infocomm Limited | 400070 | Asia/Kolkata |
# Ip_data = pd.merge(merged_data, map_data, left_on= ['IP'],
# right_on= ['IP'],
# how = 'left')
# Ip_data.info()
# Ip_data.head(3)
merged_data = pd.merge(merged_data, map_data, left_on= ['IP'],
right_on= ['IP'],
how = 'left')
merged_data.head(3)
| _id | stake | type | placedDate | horse | betRate | marketId | IP | eventType | userName | selectionName | marketName | event | averagePriceMatched | status | winnerId | IP_version | Details | city | region | country | loc | org | postal | timezone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60337ca8cc80710087ff5d8c | 500.000 | BACK | 2021-02-22T09:43:04.686Z | 8776882 | 1.020 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.020 | WINNER_DECLARED | 8776882 | IPv6 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS45609 Bharti Airtel Ltd. AS for GPRS Service', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS45609 Bharti Airtel Ltd. AS for GPRS Service | 110001 | Asia/Kolkata |
| 1 | 60337d5dcc80710087ff5d90 | 2100.000 | LAY | 2021-02-22T09:46:05.458Z | 8776882 | 1.010 | 1.180 | 49.36.123.125 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.010 | WINNER_DECLARED | 8776882 | IPv4 | ['Mumbai', 'Maharashtra', 'IN', '19.0728,72.8826', 'AS55836 Reliance Jio Infocomm Limited', '400... | Mumbai | Maharashtra | IN | 19.0728,72.8826 | AS55836 Reliance Jio Infocomm Limited | 400070 | Asia/Kolkata |
| 2 | 60337b0a13046300869b42c4 | 25000.000 | LAY | 2021-02-22T09:36:10.372Z | 8776882 | 1.060 | 1.180 | 2405:201:25:d0aa:11b4:2e1c:9999:f32a | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.050 | WINNER_DECLARED | 8776882 | IPv6 | ['Airoli', 'Maharashtra', 'IN', '19.1167,72.9833', 'AS55836 Reliance Jio Infocomm Limited', '400... | Airoli | Maharashtra | IN | 19.1167,72.9833 | AS55836 Reliance Jio Infocomm Limited | 400701 | Asia/Kolkata |
lat_lon_df = merged_data["loc"].str.split(",", n = 1, expand = True)
lat_lon_df.columns = ['Latitude', 'Longitude']
# lat_lon_df
merged_data = pd.concat([merged_data, lat_lon_df], axis=1)
merged_data.head(3)
| _id | stake | type | placedDate | horse | betRate | marketId | IP | eventType | userName | selectionName | marketName | event | averagePriceMatched | status | winnerId | IP_version | Details | city | region | country | loc | org | postal | timezone | stake_log | stake_boxcox | betRate_log | betRate_boxcox | averagePriceMatched_log | averagePriceMatched_boxcox | Date | time | hour | week | weekday | day_name | Latitude | Longitude | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60337ca8cc80710087ff5d8c | 500.000 | BACK | 2021-02-22 09:43:04.686 | 8776882 | 1.020 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.020 | WINNER_DECLARED | 8776882 | IPv6 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS45609 Bharti Airtel Ltd. AS for GPRS Service', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS45609 Bharti Airtel Ltd. AS for GPRS Service | 110001 | Asia/Kolkata | 6.215 | 5.397 | 0.020 | 0.019 | 0.020 | 0.019 | 2021-02-22 | 09:43:04.686000 | 9 | 8 | 0 | Monday | 28.6519 | 77.2315 |
| 1 | 60337d5dcc80710087ff5d90 | 2100.000 | LAY | 2021-02-22 09:46:05.458 | 8776882 | 1.010 | 1.180 | 49.36.123.125 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.010 | WINNER_DECLARED | 8776882 | IPv4 | ['Mumbai', 'Maharashtra', 'IN', '19.0728,72.8826', 'AS55836 Reliance Jio Infocomm Limited', '400... | Mumbai | Maharashtra | IN | 19.0728,72.8826 | AS55836 Reliance Jio Infocomm Limited | 400070 | Asia/Kolkata | 7.650 | 6.434 | 0.010 | 0.010 | 0.010 | 0.010 | 2021-02-22 | 09:46:05.458000 | 9 | 8 | 0 | Monday | 19.0728 | 72.8826 |
| 2 | 60337b0a13046300869b42c4 | 25000.000 | LAY | 2021-02-22 09:36:10.372 | 8776882 | 1.060 | 1.180 | 2405:201:25:d0aa:11b4:2e1c:9999:f32a | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.050 | WINNER_DECLARED | 8776882 | IPv6 | ['Airoli', 'Maharashtra', 'IN', '19.1167,72.9833', 'AS55836 Reliance Jio Infocomm Limited', '400... | Airoli | Maharashtra | IN | 19.1167,72.9833 | AS55836 Reliance Jio Infocomm Limited | 400701 | Asia/Kolkata | 10.127 | 8.071 | 0.058 | 0.056 | 0.049 | 0.047 | 2021-02-22 | 09:36:10.372000 | 9 | 8 | 0 | Monday | 19.1167 | 72.9833 |
missing_values_table(merged_data)
Dataframe has 39 columns. There are 11 columns that have missing values.
| Missing Values | % of Total Values | |
|---|---|---|
| postal | 714 | 7.100 |
| Details | 6 | 0.100 |
| city | 6 | 0.100 |
| region | 6 | 0.100 |
| country | 6 | 0.100 |
| loc | 6 | 0.100 |
| org | 6 | 0.100 |
| timezone | 6 | 0.100 |
| Latitude | 6 | 0.100 |
| Longitude | 6 | 0.100 |
| IP | 3 | 0.000 |
# missing_values_rows(merged_data, rows=3)
for i in ['IP_version', 'city', 'region', 'country', 'loc', 'org', 'postal', 'timezone']:
if merged_data[i].nunique() < 10:
count_plot(merged_data, i, size=(6,5))
else:
count_plot(merged_data, i )
# import folium
# IP_data[IP_data['Latitude'].isnull()].dropna()
# IP_data[IP_data['Longitude'].isnull()]
# for lat, lon in zip(IP_data['Latitude'].dropna().values, IP_data['Longitude'].dropna().values):
# m = folium.Map( location=[ lat, lon] )
# m=folium.Map(location=[28.644800, 77.216721], zoom_start=5)
# for lat, lon in zip(IP_data['Latitude'].dropna().values, IP_data['Longitude'].dropna().values):
# folium.Map( location=[ lat, lon] ).add_to(m)
# m
# from branca.element import Figure
# fig=Figure(width=550,height=350)
# m1=folium.Map(width=550,height=350,location=[28.644800, 77.216721],zoom_start=11,min_zoom=8,max_zoom=14)
# fig.add_child(m1)
# m1
# IP_data['Longitude'] = IP_data['loc'].str.split(',').str[0].values
# IP_data['Latitude'] = IP_data['loc'].str.split(',').str[1].values
# IP_data.head()
# Maharashtra_city_long = IP_data[IP_data["region"] == "Maharashtra"].Longitude.values
# Maharashtra_city_lat = IP_data[IP_data["region"] == "Maharashtra"].Latitude.values
# m = folium.Map(location=[Maharashtra_city_long, Maharashtra_city_lat])
# m
# IP_data[IP_data["region"] == "Maharashtra"].Longitude.values
# india_geojson=geopandas.read_file('india.json')
# india_geojson.head()
display_image('world.png', width=800)
display_image('ip_india.png', width=800)
merged_data['eventType'].value_counts()
Cricket 6788 Soccer 1885 Tennis 1393 Name: eventType, dtype: int64
# change_dtype_cat(merged_data, 'eventType')
count_plot(merged_data, 'eventType', size=(6,5))
crosstab_by_y_plot(merged_data, 'eventType', figsize=(6,5))
--------------------------------- eventType grouped by status Count ---------------------------------
| status | INVALID_BET | WINNER_DECLARED |
|---|---|---|
| eventType | ||
| Cricket | 32 | 6756 |
| Soccer | 30 | 1855 |
| Tennis | 4 | 1389 |
merged_data['userName'].nunique()
410
# merged_data['userName'].value_counts()
count_plot(merged_data, 'userName')
# crosstab_by_y_plot(merged_data, 'userName')
crosstab_by_y_table(merged_data,'userName', top=10)
-------------------------------------------------- Top 10 userName by WINNER_DECLARED and INVALID_BET --------------------------------------------------
| status | WINNER_DECLARED |
|---|---|
| userName | |
| soni111 | 489 |
| ab15 | 424 |
| aksash111 | 424 |
| angel18 | 307 |
| drgplay12 | 182 |
| mp111 | 177 |
| rock115 | 163 |
| rk50 | 156 |
| anikakan0101 | 155 |
| springplay12 | 155 |
| status | INVALID_BET |
|---|---|
| userName | |
| broraj1 | 3 |
| jk100p | 3 |
| ludo02 | 3 |
| ashu61 | 3 |
| chomo111 | 2 |
| soni111 | 2 |
| bhush001 | 2 |
| sony453 | 2 |
| raja90 | 2 |
| ri786 | 2 |
word_list = []
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
for val in merged_data.userName:
val = str(val).strip().replace(" ", "_")
word_list.append(val)
# Converts each token into lowercase
for i in range(len(word_list)):
word_list[i] = word_list[i].lower()
wc = ' '.join(str(e) for e in word_list)
wordcloud = WordCloud(width = 800, height = 800,
background_color ='black',
min_font_size = 10).generate(wc)
# plot the WordCloud image
plt.figure(figsize = (6, 6), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.title("WordCloud for UserName")
plt.show()
merged_data.selectionName.nunique()
318
count_plot(merged_data, 'selectionName')
crosstab_by_y_table(merged_data, 'selectionName', top=20)
------------------------------------------------------- Top 20 selectionName by WINNER_DECLARED and INVALID_BET -------------------------------------------------------
| status | WINNER_DECLARED |
|---|---|
| selectionName | |
| Dolphins | 1228 |
| Islamabad United | 751 |
| Titans | 717 |
| Lions | 573 |
| Lahore Qalandars | 531 |
| Karachi Kings | 412 |
| Australia | 404 |
| Leeward Islands | 369 |
| New Zealand | 343 |
| Guyana | 284 |
| Trinidad & Tobago | 273 |
| The Draw | 267 |
| No | 189 |
| Barbados | 187 |
| Novak Djokovic | 171 |
| Over 2.5 Goals | 163 |
| Under 2.5 Goals | 148 |
| Multan Sultans | 138 |
| Cape Cobras | 120 |
| Jamaica | 116 |
| status | INVALID_BET |
|---|---|
| selectionName | |
| West Indies | 13 |
| Over 1.5 Goals | 7 |
| The Draw | 6 |
| Canterbury | 3 |
| Hugo Dellien | 3 |
| Sunrisers Hyderabad | 3 |
| Delhi Capitals | 3 |
| Werder Bremen | 2 |
| Pakistan | 2 |
| Over 2.5 Goals | 2 |
| Lions | 2 |
| Northern Knights | 1 |
| Sivasspor | 1 |
| Northeast United | 1 |
| Boavista | 1 |
| Over 7.5 Goals | 1 |
| Granada | 1 |
| Under 2.5 Goals | 1 |
| Reims | 1 |
| Sociedad | 1 |
word_list = []
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import re
for val in merged_data.selectionName:
val = str(val).strip().replace(" ", "_")
val = re.sub(r'\s+', '',val)
word_list.append(val)
# Converts each token into lowercase
for i in range(len(word_list)):
word_list[i] = word_list[i].lower()
wc = ' '.join(str(e) for e in word_list)
wordcloud = WordCloud(width = 800, height = 800,
background_color ='black',
min_font_size = 10).generate(wc)
# plot the WordCloud image
plt.figure(figsize = (8, 6), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()
merged_data['marketName'].unique()
array(['Match Odds', 'Tied Match', 'Over/Under 2.5 Goals',
'Over/Under 3.5 Goals', 'Over/Under 1.5 Goals',
'Over/Under 4.5 Goals', 'Over/Under 5.5 Goals',
'Over/Under 0.5 Goals', 'Over/Under 6.5 Goals',
'Over/Under 7.5 Goals'], dtype=object)
count_plot(merged_data, 'marketName', rotation=20)
# crosstab_by_y_table(merged_data, 'marketName', top=None)
crosstab_by_y_plot(merged_data, 'marketName', stacked=False, rotation=90)
---------------------------------- marketName grouped by status Count ----------------------------------
| status | INVALID_BET | WINNER_DECLARED |
|---|---|---|
| marketName | ||
| Match Odds | 55 | 9127 |
| Over/Under 0.5 Goals | 0 | 62 |
| Over/Under 1.5 Goals | 7 | 61 |
| Over/Under 2.5 Goals | 3 | 311 |
| Over/Under 3.5 Goals | 0 | 130 |
| Over/Under 4.5 Goals | 0 | 29 |
| Over/Under 5.5 Goals | 0 | 19 |
| Over/Under 6.5 Goals | 0 | 2 |
| Over/Under 7.5 Goals | 1 | 0 |
| Tied Match | 0 | 259 |
merged_data['event'].nunique()
245
count_plot(merged_data, 'event')
crosstab_by_y_table(merged_data, 'event')
----------------------------------------------- Top 10 event by WINNER_DECLARED and INVALID_BET -----------------------------------------------
| status | WINNER_DECLARED |
|---|---|
| event | |
| Islamabad United v Multan Sultans | 967 |
| New Zealand v Australia (1st T20) | 766 |
| Lions v Warriors | 612 |
| Lahore Qalandars v Peshawar Zalmi | 561 |
| Guyana v Trinidad & Tobago | 557 |
| Dolphins v Cape Cobras | 553 |
| Leeward Islands v Jamaica | 470 |
| Warriors v Dolphins | 464 |
| Karachi Kings v Quetta Gladiators | 427 |
| Titans v Knights | 412 |
| status | INVALID_BET |
|---|---|
| event | |
| Bangladesh v West Indies | 14 |
| Pakistan v South Africa | 5 |
| Hapoel Beer Sheva v Nice | 5 |
| Delhi Capitals v Royal Challengers Bangalore | 3 |
| Sunrisers Hyderabad v Mumbai Indians | 3 |
| Central Districts v Canterbury | 3 |
| Casanova v Dellien | 3 |
| US Cremonese v Brescia | 2 |
| Lions v Warriors | 2 |
| Werder Bremen v Schalke 04 | 2 |
merged_data['winnerId'].nunique()
195
count_plot(merged_data, 'winnerId')
crosstab_by_y_table(merged_data, 'winnerId')
-------------------------------------------------- Top 10 winnerId by WINNER_DECLARED and INVALID_BET --------------------------------------------------
| status | WINNER_DECLARED |
|---|---|
| winnerId | |
| 4294273 | 1329 |
| 10782589 | 889 |
| 1254317 | 758 |
| 448 | 747 |
| 2312313 | 672 |
| 4294272 | 587 |
| 2810072 | 557 |
| 10782634 | 538 |
| 58805 | 427 |
| 10782635 | 420 |
| status | INVALID_BET |
|---|---|
| winnerId | |
| 235 | 14 |
| 58805 | 9 |
| 7461 | 5 |
| 1221385 | 5 |
| 22121561 | 3 |
| 86363 | 3 |
| 8226987 | 3 |
| 7671296 | 3 |
| 4294272 | 2 |
| 1221386 | 2 |
sns.heatmap(merged_data.corr(), vmax=.8, square=True, cmap="BuPu");
describe(merged_data['stake'])
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| stake | 10066.000 | 31709.479 | 145600.465 | 9.000 | 400.000 | 2000.000 | 20000.000 | 9300000.000 | 44194.691 | 31.301 | 1723.736 |
pdf_cdf(merged_data, 'stake', bins=20)
box_violin_plot(merged_data, 'stake')
# %%time
# sns.displot(data=merged_data, x='stake', hue='status', kde=True)
%%time
sns.histplot(data=merged_data, x='stake', hue='status', kde=True).set_title("Histogram - stake")
plt.show()
Wall time: 34.3 s
count_plot(merged_data, 'stake', top=15, rotation=0)
stake_count = merged_data['stake'].value_counts()
stake_count_idx = merged_data['stake'].value_counts().sort_index()
# stake_count_df = pd.DataFrame({'value':stake_count.index, 'count':stake_count.values})
# stake_count_idx_df = pd.DataFrame({'value':stake_count_idx.index, 'count':stake_count_idx.values})
plt.figure(figsize=(14,5))
ax= merged_data['stake'].value_counts().sort_index().plot(kind='bar', color= mat_color_list)
# print(ax.get_xticklabels())
# Turn off tick labels
# ax.set_yticklabels([])
# ax.set_xticklabels([])
axlist = []
for idx, val in zip(stake_count.sort_index().index, stake_count.sort_index().values):
if val > 100:
axlist.append(idx)
else:
axlist.append(None)
for p in ax.patches:
if (p.get_height()<100):
continue
pat = str(p.get_height())
ax.annotate(pat, (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xticklabels(axlist)
# hiding the ticks and markers
# ax.axes.get_xaxis().set_visible(False)
# ax.axes.get_yaxis().set_visible(False)
ax.tick_params(axis ='x', rotation = 70)
plt.title("Top stakes (repeated more than 100 times)")
plt.xlabel("stake")
plt.ylabel("count")
plt.show()
Observation :
Lets do more analysis.
#calculating 0-100th percentile to find a the correct percentile value for removal of outliers
for i in range(0,100,10):
var =merged_data["stake"].values
var = np.sort(var,axis = None)
print("{} percentile value is {}".format(i,var[int(len(var)*(float(i)/100))]))
print ("100 percentile value is ",var[-1])
0 percentile value is 9.0 10 percentile value is 135.0 20 percentile value is 250.0 30 percentile value is 500.0 40 percentile value is 800.0 50 percentile value is 2000.0 60 percentile value is 10000.0 70 percentile value is 10000.0 80 percentile value is 25000.0 90 percentile value is 50000.0 100 percentile value is 9300000.0
#looking further from the 99th percecntile
for i in range(90,100):
var =merged_data["stake"].values
var = np.sort(var,axis = None)
print("{} percentile value is {}".format(i,var[int(len(var)*(float(i)/100))]))
print ("100 percentile value is ",var[-1])
90 percentile value is 50000.0 91 percentile value is 60000.0 92 percentile value is 100000.0 93 percentile value is 100000.0 94 percentile value is 100000.0 95 percentile value is 113000.0 96 percentile value is 200000.0 97 percentile value is 200000.0 98 percentile value is 300000.0 99 percentile value is 500000.0 100 percentile value is 9300000.0
#pdf of trip-times after removing the outliers
sns.FacetGrid(merged_data,height=4) \
.map(sns.kdeplot,"stake") \
.add_legend()
plt.title("Distribution of stake")
plt.show();
#converting the values to log-values to chec for log-normal
import math
merged_data['stake_log']=[math.log(i) for i in merged_data['stake'].values]
describe(merged_data['stake_log'])
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| stake_log | 10066.000 | 7.854 | 2.419 | 2.197 | 5.991 | 7.601 | 9.903 | 16.046 | 2.118 | 0.204 | -0.916 |
pdf_cdf(merged_data, 'stake_log')
box_violin_plot(merged_data, 'stake_log')
#pdf of trip-times after removing the outliers
sns.FacetGrid(merged_data,height=4) \
.map(sns.kdeplot,"stake_log") \
.add_legend()
plt.title("Distribution of stake log")
plt.show();
qqPlot(merged_data, 'stake_log')
Observation :
# transform training data & save lambda value
fitted_data_stake, fitted_lambda_stake = stats.boxcox(merged_data['stake'] + 1) # Add 1 to be able to transform 0 values
# fitted_data_stake
# fitted_lambda_stake
# merged_data['stake']
# inv_boxcox(fitted_data_stake, fitted_lambda_stake)-1
merged_data['stake_boxcox'] = fitted_data_stake
describe(merged_data['stake_boxcox'])
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| stake_boxcox | 10066.000 | 6.485 | 1.665 | 2.183 | 5.229 | 6.400 | 7.931 | 11.297 | 1.462 | 0.038 | -0.998 |
pdf_cdf(merged_data, 'stake_boxcox')
box_violin_plot(merged_data, 'stake_boxcox')
histplot(merged_data, 'stake_boxcox')
qqPlot(merged_data, 'stake_boxcox')
describe(merged_data['betRate'])
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| betRate | 10066.000 | 2.452 | 13.855 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.951 | 44.571 | 2835.808 |
merged_data['betRate'].nunique()
258
# sns.histplot(data=merged_data, x='betRate', hue='status', kde=True)
box_violin_plot(merged_data, 'betRate')
# %%time
# histplot(merged_data, 'betRate')
count_plot(merged_data, 'betRate')
# describe(merged_data['betRate'])
# describe(np.log(merged_data['betRate']))
merged_data['betRate_log'] = np.log(merged_data['betRate'])
describe(merged_data[['betRate','betRate_log']]).T
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| betRate | 10066.000 | 2.452 | 13.855 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.951 | 44.571 | 2835.808 |
| betRate_log | 10066.000 | 0.425 | 0.515 | 0.010 | 0.166 | 0.336 | 0.513 | 6.908 | 0.270 | 4.969 | 33.523 |
# ax = sns.displot(np.log(merged_data['stake']))
ax = sns.displot(data=merged_data, x='betRate_log', hue='status', kde=True)
plt.title("Distribution Plot of stake log (betRate_log)")
plt.show()
# ax = sns.histplot(data=merged_data, x='stake_log', hue='status', kde=True)
qqPlot(merged_data, 'betRate_log')
# transform training data & save lambda value
fitted_data_betRate, fitted_lambda_betRate = stats.boxcox(merged_data['betRate'])
# fitted_data_betRate
# fitted_lambda_betRate
merged_data['betRate_boxcox'] = fitted_data_betRate
# describe(merged_data['betRate_boxcox'])
describe(merged_data[['betRate', 'betRate_log', 'betRate_boxcox']]).T
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| betRate | 10066.000 | 2.452 | 13.855 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.951 | 44.571 | 2835.808 |
| betRate_log | 10066.000 | 0.425 | 0.515 | 0.010 | 0.166 | 0.336 | 0.513 | 6.908 | 0.270 | 4.969 | 33.523 |
| betRate_boxcox | 10066.000 | 0.251 | 0.137 | 0.010 | 0.145 | 0.257 | 0.345 | 0.600 | 0.111 | 0.246 | -0.350 |
box_violin_plot(merged_data, 'betRate_boxcox')
# sns.distplot(fitted_data_betRate, hist = False, kde = True,
# kde_kws = {'shade': True, 'linewidth': 2},
# label = "Normal", color ="green", )
# sns.histplot(data=merged_data, x='betRate_boxcox', hue='status', kde=True)
histplot(merged_data,'betRate_boxcox', hue='status', kde=True)
qqPlot(merged_data, 'betRate_boxcox')
# merged_data['betRate'].quantile
merged_data['averagePriceMatched'].nunique()
726
describe(merged_data['averagePriceMatched'])
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| averagePriceMatched | 10066.000 | 2.466 | 13.969 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.977 | 43.882 | 2754.245 |
pdf_cdf(merged_data,'averagePriceMatched')
# %%time
# sns.displot(data=merged_data, x='averagePriceMatched', hue='status', kde=True)
# %%time
# sns.histplot(data=merged_data, x='averagePriceMatched', hue='status', kde=True)
# %%time
# sns.histplot(np.log(merged_data['averagePriceMatched']))
%%time
histplot(merged_data, 'averagePriceMatched')
merged_data['averagePriceMatched_log'] = np.log(merged_data['averagePriceMatched'])
describe(merged_data[['averagePriceMatched','averagePriceMatched_log']]).T
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| averagePriceMatched | 10066.000 | 2.466 | 13.969 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.977 | 43.882 | 2754.245 |
| averagePriceMatched_log | 10066.000 | 0.426 | 0.518 | 0.010 | 0.166 | 0.336 | 0.513 | 6.908 | 0.272 | 4.952 | 33.256 |
pdf_cdf(merged_data, 'averagePriceMatched_log')
histplot(merged_data, 'averagePriceMatched_log')
qqPlot(merged_data, 'averagePriceMatched_log')
# transform training data & save lambda value
fitted_data_averagePriceMatched, fitted_lambda_averagePriceMatched = stats.boxcox(merged_data['averagePriceMatched'])
# fitted_data_averagePriceMatched
# fitted_lambda_averagePriceMatched
merged_data['averagePriceMatched_boxcox'] = fitted_data_averagePriceMatched
describe(merged_data[['averagePriceMatched', 'averagePriceMatched_log','averagePriceMatched_boxcox']]).T
| count | mean | std | min | 25% | 50% | 75% | max | mad | skew | kurt | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| averagePriceMatched | 10066.000 | 2.466 | 13.969 | 1.010 | 1.180 | 1.400 | 1.670 | 1000.000 | 1.977 | 43.882 | 2754.245 |
| averagePriceMatched_log | 10066.000 | 0.426 | 0.518 | 0.010 | 0.166 | 0.336 | 0.513 | 6.908 | 0.272 | 4.952 | 33.256 |
| averagePriceMatched_boxcox | 10066.000 | 0.251 | 0.137 | 0.010 | 0.145 | 0.257 | 0.345 | 0.599 | 0.112 | 0.249 | -0.352 |
pdf_cdf(merged_data, 'averagePriceMatched_boxcox')
box_violin_plot(merged_data, 'averagePriceMatched_boxcox')
# sns.distplot(fitted_data_averagePriceMatched, hist = False, kde = True,
# kde_kws = {'shade': True, 'linewidth': 2},
# label = "Normal", color ="green", )
# sns.histplot(data=merged_data, x='averagePriceMatched_boxcox', hue='status', kde=True)
# sns.displot(data=merged_data, x='betRate_boxcox', hue='status', kde=True)
qqPlot(merged_data, 'averagePriceMatched_boxcox')
merged_data['averagePriceMatched_boxcox'].plot.kde()
<AxesSubplot:ylabel='Density'>
merged_data['placedDate']
0 2021-02-22T09:43:04.686Z
1 2021-02-22T09:46:05.458Z
2 2021-02-22T09:36:10.372Z
3 2021-02-22T09:35:32.772Z
4 2021-02-22T09:34:47.648Z
...
10061 2021-02-18T15:39:37.308Z
10062 2021-02-18T15:39:47.201Z
10063 2021-02-18T18:20:17.016Z
10064 2021-02-18T21:01:38.393Z
10065 2021-02-19T21:55:36.789Z
Name: placedDate, Length: 10066, dtype: object
merged_data['placedDate'].dtype
dtype('O')
now = pd.Timestamp('now')
merged_data['placedDate'] = pd.to_datetime(merged_data['placedDate'], format='%Y-%m-%dT%H:%M:%S.%fZ')
merged_data['placedDate'].dtype
dtype('<M8[ns]')
merged_data['placedDate']
0 2021-02-22 09:43:04.686
1 2021-02-22 09:46:05.458
2 2021-02-22 09:36:10.372
3 2021-02-22 09:35:32.772
4 2021-02-22 09:34:47.648
...
10061 2021-02-18 15:39:37.308
10062 2021-02-18 15:39:47.201
10063 2021-02-18 18:20:17.016
10064 2021-02-18 21:01:38.393
10065 2021-02-19 21:55:36.789
Name: placedDate, Length: 10066, dtype: datetime64[ns]
merged_data['placedDate'].nunique()
10063
merged_data['Date'] = merged_data['placedDate'].dt.date
merged_data['Date'].nunique()
27
plt.figure(figsize=(13,4))
ax = sns.scatterplot(data=merged_data, x="Date", y="status", hue='status', style='status')
plt.xticks(list(merged_data['Date'].value_counts().sort_index().index))
ax.tick_params(axis ='x', rotation = 80)
# ax.set_xticklabels(x_ticks, rotation=0, fontsize=8)
# ax.set_yticklabels(y_ticks, rotation=0, fontsize=8)
plt.title("Top stakes (repeated more than 100 times)")
plt.xlabel("stake")
plt.ylabel("count")
# plt.tight_layout()
plt.show()
# merged_data[merged_data['status']=='WINNER_DECLARED']['Date'].value_counts().sort_index()
# merged_data[merged_data['status']=='INVALID_BET']['Date'].value_counts().sort_index()
datei = pd.crosstab(merged_data['Date'],merged_data['status'])
datei.sort_values('WINNER_DECLARED', ascending=False).sort_index().T
| Date | 2020-11-02 | 2020-11-03 | 2020-11-05 | 2020-11-08 | 2020-12-01 | 2020-12-07 | 2020-12-08 | 2020-12-09 | 2020-12-10 | 2021-01-22 | 2021-01-27 | 2021-01-28 | 2021-01-30 | 2021-02-04 | 2021-02-05 | 2021-02-06 | 2021-02-08 | 2021-02-11 | 2021-02-12 | 2021-02-13 | 2021-02-14 | 2021-02-15 | 2021-02-18 | 2021-02-19 | 2021-02-20 | 2021-02-21 | 2021-02-22 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| status | |||||||||||||||||||||||||||
| INVALID_BET | 3 | 3 | 1 | 1 | 1 | 1 | 2 | 2 | 6 | 1 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 1 | 1 | 16 | 2 | 1 | 5 | 1 | 0 | 0 | 0 |
| WINNER_DECLARED | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2221 | 3193 | 3671 | 915 |
plt.figure(figsize=(14,6))
ax = merged_data['Date'].value_counts().sort_index().plot(kind='bar')
add_value_labels(ax)
plt.title('Barplot Date')
plt.xlabel('count')
plt.ylabel('Date')
plt.show()
merged_data.groupby(['Date'])['status'].value_counts().sort_index()
Date status
2020-11-02 INVALID_BET 3
2020-11-03 INVALID_BET 3
2020-11-05 INVALID_BET 1
2020-11-08 INVALID_BET 1
2020-12-01 INVALID_BET 1
2020-12-07 INVALID_BET 1
2020-12-08 INVALID_BET 2
2020-12-09 INVALID_BET 2
2020-12-10 INVALID_BET 6
2021-01-22 INVALID_BET 1
2021-01-27 INVALID_BET 1
2021-01-28 INVALID_BET 2
2021-01-30 INVALID_BET 3
2021-02-04 INVALID_BET 3
2021-02-05 INVALID_BET 3
2021-02-06 INVALID_BET 3
2021-02-08 INVALID_BET 3
2021-02-11 INVALID_BET 1
2021-02-12 INVALID_BET 1
2021-02-13 INVALID_BET 16
2021-02-14 INVALID_BET 2
2021-02-15 INVALID_BET 1
2021-02-18 INVALID_BET 5
2021-02-19 INVALID_BET 1
WINNER_DECLARED 2221
2021-02-20 WINNER_DECLARED 3193
2021-02-21 WINNER_DECLARED 3671
2021-02-22 WINNER_DECLARED 915
Name: status, dtype: int64
merged_data['time'] = merged_data['placedDate'].dt.time
merged_data['time']
0 09:43:04.686000
1 09:46:05.458000
2 09:36:10.372000
3 09:35:32.772000
4 09:34:47.648000
...
10061 15:39:37.308000
10062 15:39:47.201000
10063 18:20:17.016000
10064 21:01:38.393000
10065 21:55:36.789000
Name: time, Length: 10066, dtype: object
merged_data['time'].nunique()
10061
merged_data['hour'] = merged_data['placedDate'].dt.hour
merged_data['hour'].nunique()
24
plt.figure(figsize=(14,6))
ax = merged_data['hour'].value_counts().sort_index().plot(kind='bar')
plt.xticks(np.arange(0, 25, 1))
add_value_labels(ax)
plt.xticks(rotation=0)
plt.title("Bar Plot for Hour")
plt.xlabel('hour')
plt.ylabel('Count')
plt.show()
# crosstab_by_y(merged_data, 'hour', transposed=True)
crosstab_by_y_plot(merged_data, 'hour', transposed=True, figsize=(14,6), stacked=False, legend_out=False)
---------------------------- hour grouped by status Count ----------------------------
| hour | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| status | ||||||||||||||||||||||||
| INVALID_BET | 0 | 0 | 0 | 2 | 1 | 3 | 3 | 3 | 11 | 1 | 1 | 0 | 1 | 1 | 11 | 4 | 3 | 1 | 2 | 8 | 3 | 6 | 1 | 0 |
| WINNER_DECLARED | 8 | 20 | 13 | 14 | 52 | 118 | 302 | 226 | 809 | 798 | 705 | 439 | 595 | 592 | 816 | 960 | 785 | 770 | 622 | 461 | 434 | 300 | 107 | 54 |
merged_data['week'] = merged_data['placedDate'].dt.isocalendar().week
merged_data['week'].value_counts()
7 9092 8 915 6 23 50 11 5 9 45 8 4 6 49 1 3 1 Name: week, dtype: Int64
merged_data['week'].nunique()
9
plt.figure(figsize=(10,6))
ax = merged_data['week'].value_counts().sort_index().plot(kind='bar')
add_value_labels(ax)
plt.show()
merged_data['weekday'] = merged_data['placedDate'].dt.weekday
merged_data['weekday'].nunique()
7
# plt.figure(figsize=(14,6))
ax = merged_data['weekday'].value_counts().sort_index().plot(kind='bar')
add_value_labels(ax)
plt.show()
merged_data['day_name'] = merged_data['placedDate'].dt.day_name()
cats = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
from pandas.api.types import CategoricalDtype
cat_type = CategoricalDtype(categories=cats, ordered=True)
merged_data['day_name'].dtype
dtype('O')
merged_data['day_name'] = merged_data['day_name'].astype(cat_type)
merged_data['day_name'].dtype
CategoricalDtype(categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)
# merged_data['day_name'].nunique()
# merged_data['day_name'].value_counts().sort_index()
count_plot(merged_data, 'day_name', sort_index=True, size=(10,6), rotation=0)
crosstab_by_y_plot(merged_data, 'day_name', transposed=True, figsize=(14,6), stacked=False, legend_out=False)
-------------------------------- day_name grouped by status Count --------------------------------
| day_name | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday | Sunday |
|---|---|---|---|---|---|---|---|
| status | |||||||
| INVALID_BET | 8 | 6 | 3 | 18 | 6 | 22 | 3 |
| WINNER_DECLARED | 915 | 0 | 0 | 0 | 2221 | 3193 | 3671 |
merged_data.head()
| _id | stake | type | placedDate | horse | betRate | marketId | IP | eventType | userName | selectionName | marketName | event | averagePriceMatched | status | winnerId | IP_version | Details | city | region | country | loc | org | postal | timezone | stake_log | stake_boxcox | betRate_log | betRate_boxcox | averagePriceMatched_log | averagePriceMatched_boxcox | Date | time | hour | week | weekday | day_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60337ca8cc80710087ff5d8c | 500.000 | BACK | 2021-02-22 09:43:04.686 | 8776882 | 1.020 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.020 | WINNER_DECLARED | 8776882 | IPv6 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS45609 Bharti Airtel Ltd. AS for GPRS Service', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS45609 Bharti Airtel Ltd. AS for GPRS Service | 110001 | Asia/Kolkata | 6.215 | 5.397 | 0.020 | 0.019 | 0.020 | 0.019 | 2021-02-22 | 09:43:04.686000 | 9 | 8 | 0 | Monday |
| 1 | 60337d5dcc80710087ff5d90 | 2100.000 | LAY | 2021-02-22 09:46:05.458 | 8776882 | 1.010 | 1.180 | 49.36.123.125 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.010 | WINNER_DECLARED | 8776882 | IPv4 | ['Mumbai', 'Maharashtra', 'IN', '19.0728,72.8826', 'AS55836 Reliance Jio Infocomm Limited', '400... | Mumbai | Maharashtra | IN | 19.0728,72.8826 | AS55836 Reliance Jio Infocomm Limited | 400070 | Asia/Kolkata | 7.650 | 6.434 | 0.010 | 0.010 | 0.010 | 0.010 | 2021-02-22 | 09:46:05.458000 | 9 | 8 | 0 | Monday |
| 2 | 60337b0a13046300869b42c4 | 25000.000 | LAY | 2021-02-22 09:36:10.372 | 8776882 | 1.060 | 1.180 | 2405:201:25:d0aa:11b4:2e1c:9999:f32a | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.050 | WINNER_DECLARED | 8776882 | IPv6 | ['Airoli', 'Maharashtra', 'IN', '19.1167,72.9833', 'AS55836 Reliance Jio Infocomm Limited', '400... | Airoli | Maharashtra | IN | 19.1167,72.9833 | AS55836 Reliance Jio Infocomm Limited | 400701 | Asia/Kolkata | 10.127 | 8.071 | 0.058 | 0.056 | 0.049 | 0.047 | 2021-02-22 | 09:36:10.372000 | 9 | 8 | 0 | Monday |
| 3 | 60337ae4b0517a00b16380e8 | 300.000 | LAY | 2021-02-22 09:35:32.772 | 8776882 | 1.070 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.070 | WINNER_DECLARED | 8776882 | IPv6 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS45609 Bharti Airtel Ltd. AS for GPRS Service', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS45609 Bharti Airtel Ltd. AS for GPRS Service | 110001 | Asia/Kolkata | 5.704 | 5.011 | 0.068 | 0.064 | 0.068 | 0.064 | 2021-02-22 | 09:35:32.772000 | 9 | 8 | 0 | Monday |
| 4 | 60337ab7f343e00049c69416 | 500.000 | BACK | 2021-02-22 09:34:47.648 | 8776882 | 1.080 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.080 | WINNER_DECLARED | 8776882 | IPv6 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS45609 Bharti Airtel Ltd. AS for GPRS Service', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS45609 Bharti Airtel Ltd. AS for GPRS Service | 110001 | Asia/Kolkata | 6.215 | 5.397 | 0.077 | 0.072 | 0.077 | 0.072 | 2021-02-22 | 09:34:47.648000 | 9 | 8 | 0 | Monday |
missing_values_table(merged_data)
Dataframe has 37 columns. There are 9 columns that have missing values.
| Missing Values | % of Total Values | |
|---|---|---|
| postal | 714 | 7.100 |
| Details | 6 | 0.100 |
| city | 6 | 0.100 |
| region | 6 | 0.100 |
| country | 6 | 0.100 |
| loc | 6 | 0.100 |
| org | 6 | 0.100 |
| timezone | 6 | 0.100 |
| IP | 3 | 0.000 |
merged_data.to_csv('data_after_EDA.csv',index=False)
# %who
display_image("timeseries.png",width=2000)
# %%time
# sns.pairplot(merged_data, hue='status')
# plt.title('PairPlot')
# plt.show()
Compute pairwise correlation of columns, excluding NA/null values.
corr = merged_data.corr()
plt.figure(figsize=(10,7))
sns.heatmap(corr, cmap='Blues')
plt.show()
heading("Top 20 Absolute Correlations")
print(get_top_abs_correlations(corr, 20))
----------------------------
Top 20 Absolute Correlations
----------------------------
betRate_log averagePriceMatched_log 1.000
betRate_boxcox averagePriceMatched_boxcox 1.000
betRate averagePriceMatched 1.000
stake_log stake_boxcox 1.000
horse winnerId 0.997
betRate_boxcox averagePriceMatched_log 0.909
averagePriceMatched_log averagePriceMatched_boxcox 0.909
betRate_log betRate_boxcox 0.909
averagePriceMatched_boxcox 0.909
horse marketId 0.748
marketId winnerId 0.747
averagePriceMatched betRate_log 0.719
averagePriceMatched_log 0.718
betRate betRate_log 0.718
averagePriceMatched_log 0.717
stake stake_log 0.655
stake_boxcox 0.641
hour weekday 0.610
marketId weekday 0.519
winnerId betRate_log 0.499
dtype: float64
merged_data.head(4)
| _id | stake | type | placedDate | horse | betRate | marketId | IP | eventType | userName | selectionName | marketName | event | averagePriceMatched | status | winnerId | IP_version | Details | city | region | country | loc | org | postal | timezone | stake_log | stake_boxcox | betRate_log | betRate_boxcox | averagePriceMatched_log | averagePriceMatched_boxcox | Date | time | hour | week | weekday | day_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60337ca8cc80710087ff5d8c | 500.000 | BACK | 2021-02-22 09:43:04.686 | 8776882 | 1.020 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.020 | WINNER_DECLARED | 8776882 | IPv6 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS45609 Bharti Airtel Ltd. AS for GPRS Service', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS45609 Bharti Airtel Ltd. AS for GPRS Service | 110001 | Asia/Kolkata | 6.215 | 5.397 | 0.020 | 0.019 | 0.020 | 0.019 | 2021-02-22 | 09:43:04.686000 | 9 | 8 | 0 | Monday |
| 1 | 60337d5dcc80710087ff5d90 | 2100.000 | LAY | 2021-02-22 09:46:05.458 | 8776882 | 1.010 | 1.180 | 49.36.123.125 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.010 | WINNER_DECLARED | 8776882 | IPv4 | ['Mumbai', 'Maharashtra', 'IN', '19.0728,72.8826', 'AS55836 Reliance Jio Infocomm Limited', '400... | Mumbai | Maharashtra | IN | 19.0728,72.8826 | AS55836 Reliance Jio Infocomm Limited | 400070 | Asia/Kolkata | 7.650 | 6.434 | 0.010 | 0.010 | 0.010 | 0.010 | 2021-02-22 | 09:46:05.458000 | 9 | 8 | 0 | Monday |
| 2 | 60337b0a13046300869b42c4 | 25000.000 | LAY | 2021-02-22 09:36:10.372 | 8776882 | 1.060 | 1.180 | 2405:201:25:d0aa:11b4:2e1c:9999:f32a | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.050 | WINNER_DECLARED | 8776882 | IPv6 | ['Airoli', 'Maharashtra', 'IN', '19.1167,72.9833', 'AS55836 Reliance Jio Infocomm Limited', '400... | Airoli | Maharashtra | IN | 19.1167,72.9833 | AS55836 Reliance Jio Infocomm Limited | 400701 | Asia/Kolkata | 10.127 | 8.071 | 0.058 | 0.056 | 0.049 | 0.047 | 2021-02-22 | 09:36:10.372000 | 9 | 8 | 0 | Monday |
| 3 | 60337ae4b0517a00b16380e8 | 300.000 | LAY | 2021-02-22 09:35:32.772 | 8776882 | 1.070 | 1.180 | 2401:4900:30e5:71f1:a86c:d186:5d52:3fa8 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 1.070 | WINNER_DECLARED | 8776882 | IPv6 | ['Delhi', 'Delhi', 'IN', '28.6519,77.2315', 'AS45609 Bharti Airtel Ltd. AS for GPRS Service', '1... | Delhi | Delhi | IN | 28.6519,77.2315 | AS45609 Bharti Airtel Ltd. AS for GPRS Service | 110001 | Asia/Kolkata | 5.704 | 5.011 | 0.068 | 0.064 | 0.068 | 0.064 | 2021-02-22 | 09:35:32.772000 | 9 | 8 | 0 | Monday |
features = list(merged_data.columns.values)
merged_data.nunique()
_id 10066 stake 376 type 2 placedDate 10063 horse 319 betRate 258 marketId 390 IP 1070 eventType 3 userName 410 selectionName 318 marketName 10 event 245 averagePriceMatched 726 status 2 winnerId 195 IP_version 3 Details 222 city 85 region 30 country 9 loc 105 org 102 postal 103 timezone 10 stake_log 376 stake_boxcox 376 betRate_log 258 betRate_boxcox 258 averagePriceMatched_log 721 averagePriceMatched_boxcox 714 Date 27 time 10061 hour 24 week 9 weekday 7 day_name 7 dtype: int64
from sklearn.feature_selection import SelectKBest,f_classif,chi2
merged_data.columns.values
array(['_id', 'stake', 'type', 'placedDate', 'horse', 'betRate',
'marketId', 'IP', 'eventType', 'userName', 'selectionName',
'marketName', 'event', 'averagePriceMatched', 'status', 'winnerId',
'IP_version', 'Details', 'city', 'region', 'country', 'loc', 'org',
'postal', 'timezone', 'stake_log', 'stake_boxcox', 'betRate_log',
'betRate_boxcox', 'averagePriceMatched_log',
'averagePriceMatched_boxcox', 'Date', 'time', 'hour', 'week',
'weekday', 'day_name', 'Latitude', 'Longitude'], dtype=object)
numerical_fet = ['stake', 'stake_log', 'stake_boxcox', 'betRate', 'betRate_log', 'betRate_boxcox',
'marketId', 'averagePriceMatched', 'averagePriceMatched_log', 'averagePriceMatched_boxcox']
categorical_fet = ['type', 'horse', 'eventType', 'userName', 'selectionName', 'marketName', 'event', 'winnerId',
'IP_version', 'city', 'region', 'country', 'org', 'timezone',
'Date', 'hour', 'week', 'weekday']
dependent_fet = 'status'
merged_data[numerical_fet].head()
| stake | stake_log | stake_boxcox | betRate | betRate_log | betRate_boxcox | marketId | averagePriceMatched | averagePriceMatched_log | averagePriceMatched_boxcox | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 500.000 | 6.215 | 5.397 | 1.020 | 0.020 | 0.019 | 1.180 | 1.020 | 0.020 | 0.019 |
| 1 | 2100.000 | 7.650 | 6.434 | 1.010 | 0.010 | 0.010 | 1.180 | 1.010 | 0.010 | 0.010 |
| 2 | 25000.000 | 10.127 | 8.071 | 1.060 | 0.058 | 0.056 | 1.180 | 1.050 | 0.049 | 0.047 |
| 3 | 300.000 | 5.704 | 5.011 | 1.070 | 0.068 | 0.064 | 1.180 | 1.070 | 0.068 | 0.064 |
| 4 | 500.000 | 6.215 | 5.397 | 1.080 | 0.077 | 0.072 | 1.180 | 1.080 | 0.077 | 0.072 |
merged_data[categorical_fet].head(3)
| type | horse | eventType | userName | selectionName | marketName | event | winnerId | IP_version | city | region | country | org | timezone | Date | hour | week | weekday | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | BACK | 8776882 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv6 | Delhi | Delhi | IN | AS45609 Bharti Airtel Ltd. AS for GPRS Service | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 |
| 1 | LAY | 8776882 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv4 | Mumbai | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 |
| 2 | LAY | 8776882 | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv6 | Airoli | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 |
missing_values_table(merged_data[categorical_fet])
Dataframe has 18 columns. There are 5 columns that have missing values.
| Missing Values | % of Total Values | |
|---|---|---|
| city | 6 | 0.100 |
| region | 6 | 0.100 |
| country | 6 | 0.100 |
| org | 6 | 0.100 |
| timezone | 6 | 0.100 |
new_df = pd.concat([merged_data[numerical_fet], merged_data[categorical_fet], merged_data[dependent_fet]], axis=1)
new_df.head(3)
| stake | stake_log | stake_boxcox | betRate | betRate_log | betRate_boxcox | marketId | averagePriceMatched | averagePriceMatched_log | averagePriceMatched_boxcox | type | horse | eventType | userName | selectionName | marketName | event | winnerId | IP_version | city | region | country | org | timezone | Date | hour | week | weekday | status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 500.000 | 6.215 | 5.397 | 1.020 | 0.020 | 0.019 | 1.180 | 1.020 | 0.020 | 0.019 | BACK | 8776882 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv6 | Delhi | Delhi | IN | AS45609 Bharti Airtel Ltd. AS for GPRS Service | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 | WINNER_DECLARED |
| 1 | 2100.000 | 7.650 | 6.434 | 1.010 | 0.010 | 0.010 | 1.180 | 1.010 | 0.010 | 0.010 | LAY | 8776882 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv4 | Mumbai | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 | WINNER_DECLARED |
| 2 | 25000.000 | 10.127 | 8.071 | 1.060 | 0.058 | 0.056 | 1.180 | 1.050 | 0.049 | 0.047 | LAY | 8776882 | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv6 | Airoli | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 | WINNER_DECLARED |
missing_values_table(new_df)
Dataframe has 29 columns. There are 5 columns that have missing values.
| Missing Values | % of Total Values | |
|---|---|---|
| city | 6 | 0.100 |
| region | 6 | 0.100 |
| country | 6 | 0.100 |
| org | 6 | 0.100 |
| timezone | 6 | 0.100 |
new_df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 10066 entries, 0 to 10065 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 stake 10066 non-null float64 1 stake_log 10066 non-null float64 2 stake_boxcox 10066 non-null float64 3 betRate 10066 non-null float64 4 betRate_log 10066 non-null float64 5 betRate_boxcox 10066 non-null float64 6 marketId 10066 non-null float64 7 averagePriceMatched 10066 non-null float64 8 averagePriceMatched_log 10066 non-null float64 9 averagePriceMatched_boxcox 10066 non-null float64 10 type 10066 non-null object 11 horse 10066 non-null int64 12 eventType 10066 non-null object 13 userName 10066 non-null object 14 selectionName 10066 non-null object 15 marketName 10066 non-null object 16 event 10066 non-null object 17 winnerId 10066 non-null int64 18 IP_version 10066 non-null object 19 city 10060 non-null object 20 region 10060 non-null object 21 country 10060 non-null object 22 org 10060 non-null object 23 timezone 10060 non-null object 24 Date 10066 non-null object 25 hour 10066 non-null int64 26 week 10066 non-null UInt32 27 weekday 10066 non-null int64 28 status 10066 non-null object dtypes: UInt32(1), float64(10), int64(4), object(14) memory usage: 2.6+ MB
print(DataFrameImputer.__doc__)
Impute missing values.
Columns of dtype object are imputed with the most frequent value (mode)
in column.
Columns of other types are imputed with mean of column.
data = DataFrameImputer().fit_transform(new_df)
data.isnull().sum().any()
False
data.head(3)
| stake | stake_log | stake_boxcox | betRate | betRate_log | betRate_boxcox | marketId | averagePriceMatched | averagePriceMatched_log | averagePriceMatched_boxcox | type | horse | eventType | userName | selectionName | marketName | event | winnerId | IP_version | city | region | country | org | timezone | Date | hour | week | weekday | status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 500.000 | 6.215 | 5.397 | 1.020 | 0.020 | 0.019 | 1.180 | 1.020 | 0.020 | 0.019 | BACK | 8776882 | Tennis | brovinn | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv6 | Delhi | Delhi | IN | AS45609 Bharti Airtel Ltd. AS for GPRS Service | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 | WINNER_DECLARED |
| 1 | 2100.000 | 7.650 | 6.434 | 1.010 | 0.010 | 0.010 | 1.180 | 1.010 | 0.010 | 0.010 | LAY | 8776882 | Tennis | aksash111 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv4 | Mumbai | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 | WINNER_DECLARED |
| 2 | 25000.000 | 10.127 | 8.071 | 1.060 | 0.058 | 0.056 | 1.180 | 1.050 | 0.049 | 0.047 | LAY | 8776882 | Tennis | pinka2 | Danielle Rose Collins | Match Odds | Saisai Zheng v Danielle Rose Collins | 8776882 | IPv6 | Airoli | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-22 | 9 | 8 | 0 | WINNER_DECLARED |
data.shape
(10066, 29)
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, test_size=0.3, stratify=data['status'])
print(train.shape)
print(test.shape)
(7046, 29) (3020, 29)
train['status'].value_counts()
WINNER_DECLARED 7000 INVALID_BET 46 Name: status, dtype: int64
test['status'].value_counts()
WINNER_DECLARED 3000 INVALID_BET 20 Name: status, dtype: int64
def pie_labeling_train(x):
print(x)
return '{:.4f}%\n(#{:.0f})'.format(x, sums_train.values.sum()*x/100)
def pie_labeling_test(x):
print(x)
return '{:.4f}%\n(#{:.0f})'.format(x, sums_test.values.sum()*x/100)
pie_labeling(10)
10
'10.0000%\n(#1007)'
fig = plt.figure(figsize=(24,10))
#this line will produce a figure which has 2 row
#and 4 columns
#(0, 0) specifies the left upper coordinate of your plot
# ax1 = plt.subplot2grid((2,4),(0,0))
# sums_train = train['status'].value_counts()
# plt.pie(sums_train, labels=sums_train.index,autopct=pie_labeling_train, pctdistance=1.3, labeldistance=1.6)
# plt.title('Train Dependent Variable ("status") distribution')
ax1 = plt.subplot2grid((2,4),(0,0))
sums_train = train['status'].value_counts()
plt.pie(sums_train, labels=['WINNER_DECLARED', 'INVALID_BET'],autopct=pie_labeling_train, pctdistance=1.3, labeldistance=1.6)
plt.title('Train Dependent Variable ("status") distribution')
# #next one
# ax1 = plt.subplot2grid((2, 4), (0, 1))
# sums_test = test['status'].value_counts()
# plt.pie(sums_test, labels=sums_test.index,autopct=pie_labeling_test, pctdistance=1.3, labeldistance=1.6)
# plt.title('Test Dependent Variable ("status") distribution')
#next one
ax1 = plt.subplot2grid((2, 4), (0, 1))
sums_test = test['status'].value_counts()
plt.pie(sums_test, labels=['WINNER_DECLARED', 'INVALID_BET'],autopct=pie_labeling_test, pctdistance=1.3, labeldistance=1.6)
plt.title('Test Dependent Variable ("status") distribution')
plt.tight_layout()
plt.show()
99.34714436531067 0.652852701023221 99.33775067329407 0.6622516550123692
train = train.replace({'status': {'WINNER_DECLARED':0, 'INVALID_BET':1}})
test = test.replace({'status': {'WINNER_DECLARED':0, 'INVALID_BET':1}})
y_train = train['status'].copy()
X_train = train.drop("status",axis=1).copy()
print(X_train.shape)
print(y_train.shape)
(7046, 28) (7046,)
X_train.columns
Index(['stake', 'stake_log', 'stake_boxcox', 'betRate', 'betRate_log', 'betRate_boxcox', 'marketId', 'averagePriceMatched', 'averagePriceMatched_log', 'averagePriceMatched_boxcox', 'type', 'horse', 'eventType', 'userName', 'selectionName', 'marketName', 'event', 'winnerId', 'IP_version', 'city', 'region', 'country', 'org', 'timezone', 'Date', 'hour', 'week', 'weekday'], dtype='object')
y_test=test['status'].copy()
X_test=test.drop("status",axis=1).copy()
print(X_test.shape)
print(y_test.shape)
(3020, 28) (3020,)
for i in train.columns:
print("{:<40}{:>20}".format(i,train[i].nunique()))
stake 318 stake_log 318 stake_boxcox 318 betRate 248 betRate_log 248 betRate_boxcox 248 marketId 357 averagePriceMatched 576 averagePriceMatched_log 572 averagePriceMatched_boxcox 566 type 2 horse 290 eventType 3 userName 392 selectionName 289 marketName 9 event 228 winnerId 184 IP_version 3 city 82 region 30 country 9 org 98 timezone 10 Date 23 hour 24 week 9 weekday 7 status 2
X_train_numerical=X_train[numerical_fet].copy()
X_test_numerical=X_test[numerical_fet].copy()
from sklearn.preprocessing import StandardScaler,OneHotEncoder , LabelEncoder ,normalize
scaler = StandardScaler()
scaler.fit(X_train_numerical)
X_train_numerical_std = scaler.transform(X_train_numerical)
X_test_numerical_std = scaler.transform(X_test_numerical)
X_train_numerical_std=pd.DataFrame(X_train_numerical_std,columns=numerical_fet)
X_test_numerical_std=pd.DataFrame(X_test_numerical_std,columns=numerical_fet)
# Checking the values after converting
X_train_numerical_std.head()
| stake | stake_log | stake_boxcox | betRate | betRate_log | betRate_boxcox | marketId | averagePriceMatched | averagePriceMatched_log | averagePriceMatched_boxcox | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.202 | -0.682 | -0.659 | -0.106 | -0.121 | 0.157 | 0.756 | -0.106 | -0.135 | 0.130 |
| 1 | -0.202 | -0.725 | -0.706 | -0.028 | 0.598 | 1.246 | 0.061 | -0.029 | 0.594 | 1.244 |
| 2 | -0.202 | -0.774 | -0.759 | -0.151 | -0.701 | -1.366 | 0.826 | -0.148 | -0.699 | -1.362 |
| 3 | -0.094 | 0.796 | 0.823 | -0.098 | -0.041 | 0.313 | 0.745 | -0.097 | -0.042 | 0.314 |
| 4 | -0.192 | -0.110 | -0.056 | -0.062 | 0.316 | 0.895 | -0.098 | -0.062 | 0.313 | 0.894 |
print("Shape of Standardized X_train: ",X_train_numerical_std.shape)
print("Shape of Standardized X_test: ",X_test_numerical_std.shape)
Shape of Standardized X_train: (7046, 10) Shape of Standardized X_test: (3020, 10)
X_train_categorical=X_train[categorical_fet].copy()
X_test_categorical=X_test[categorical_fet].copy()
X_train_categorical.head()
| type | horse | eventType | userName | selectionName | marketName | event | winnerId | IP_version | city | region | country | org | timezone | Date | hour | week | weekday | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8589 | LAY | 4859354 | Cricket | tgtg | Leeward Islands | Match Odds | Leeward Islands v Jamaica | 2312313 | IPv4 | Jaipur | Rajasthan | IN | AS45609 Bharti Airtel Ltd. AS for GPRS Service | Asia/Kolkata | 2021-02-19 | 16 | 7 | 4 |
| 5304 | BACK | 47972 | Soccer | joy111 | Under 2.5 Goals | Over/Under 2.5 Goals | Liverpool v Everton | 47972 | IPv4 | Davorlim | Goa | IN | AS9829 National Internet Backbone | Asia/Kolkata | 2021-02-20 | 18 | 7 | 5 |
| 4645 | BACK | 2810072 | Cricket | aksash111 | Trinidad & Tobago | Match Odds | Guyana v Trinidad & Tobago | 2810072 | IPv4 | Mumbai | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-20 | 22 | 7 | 5 |
| 9393 | BACK | 9128710 | Tennis | pinka2 | Adrian Menendez-Maceira | Match Odds | L Broady v Menendez-Maceiras | 5626816 | IPv6 | Airoli | Maharashtra | IN | AS55836 Reliance Jio Infocomm Limited | Asia/Kolkata | 2021-02-19 | 11 | 7 | 4 |
| 9258 | LAY | 4294273 | Cricket | pu01 | Dolphins | Match Odds | Dolphins v Cape Cobras | 4294273 | IPv6 | Ahmedabad | Gujarat | IN | AS45609 Bharti Airtel Ltd. AS for GPRS Service | Asia/Kolkata | 2021-02-19 | 12 | 7 | 4 |
onehot_encoder = OneHotEncoder(sparse=False, handle_unknown = 'ignore')
X_train_categorical_encoded = onehot_encoder.fit(X_train_categorical)
X_train_categorical_encoded = onehot_encoder.transform(X_train_categorical)
X_test_categorical_encoded = onehot_encoder.transform(X_test_categorical)
# Checking the Encoded Data
X_train_categorical_encoded
array([[0., 1., 0., ..., 1., 0., 0.],
[1., 0., 0., ..., 0., 1., 0.],
[1., 0., 0., ..., 0., 1., 0.],
...,
[0., 1., 0., ..., 0., 1., 0.],
[1., 0., 0., ..., 0., 0., 1.],
[1., 0., 0., ..., 0., 1., 0.]])
print("X_train after One Hot Encoding: ",X_train_categorical_encoded.shape)
print("X_test after One Hot Encoding: ",X_test_categorical_encoded.shape)
X_train after One Hot Encoding: (7046, 1692) X_test after One Hot Encoding: (3020, 1692)
## Obtaining Feature Names from the Classifier
# list(onehot_encoder.get_feature_names(categorical_fet))
encodedCatColumnNames = list(onehot_encoder.get_feature_names(categorical_fet))
X_train_categorical_encoded=pd.DataFrame(X_train_categorical_encoded,columns=encodedCatColumnNames)
X_test_categorical_encoded=pd.DataFrame(X_test_categorical_encoded,columns=encodedCatColumnNames)
X_train_categorical_encoded.head(3)
| type_BACK | type_LAY | horse_235 | horse_448 | horse_1117 | horse_1703 | horse_2426 | horse_2685 | horse_7407 | horse_7461 | horse_7659 | horse_9162 | horse_9163 | horse_10501 | horse_10761 | horse_10774 | horse_10779 | horse_13360 | horse_14072 | horse_16606 | horse_28191 | horse_28214 | horse_28220 | horse_28223 | horse_37302 | horse_37303 | horse_41433 | horse_44503 | horse_44504 | horse_44507 | horse_44508 | horse_44518 | horse_44519 | horse_44521 | horse_44526 | horse_44785 | horse_44787 | horse_44790 | horse_44793 | horse_44794 | horse_44795 | horse_44797 | horse_44798 | horse_44800 | horse_46726 | horse_47972 | horse_47973 | horse_47998 | horse_47999 | horse_48043 | horse_48044 | horse_48224 | horse_48351 | horse_48451 | horse_48461 | horse_48470 | horse_48756 | horse_48759 | horse_48783 | horse_48785 | horse_48786 | horse_48787 | horse_48793 | horse_48799 | horse_50347 | horse_50349 | horse_55190 | horse_55223 | horse_55243 | horse_55264 | horse_55270 | horse_55271 | horse_56036 | horse_56299 | horse_56301 | horse_56323 | horse_56343 | horse_56363 | horse_56764 | horse_56966 | horse_56967 | horse_58805 | horse_58943 | horse_59044 | horse_60294 | horse_60295 | horse_60297 | horse_60303 | horse_60310 | horse_60443 | horse_62683 | horse_63347 | horse_64374 | horse_64964 | horse_65352 | horse_65778 | horse_66183 | horse_67143 | horse_69718 | horse_69720 | horse_70385 | horse_70468 | horse_77586 | horse_78864 | horse_79323 | horse_79343 | horse_84649 | horse_86359 | horse_113123 | horse_113125 | horse_113187 | horse_113191 | horse_113239 | horse_121724 | horse_127991 | horse_191604 | horse_191607 | horse_198136 | horse_198138 | horse_199184 | horse_199545 | horse_201327 | horse_208035 | horse_214865 | horse_215817 | horse_215821 | horse_215829 | horse_247969 | horse_259394 | horse_269792 | horse_298233 | horse_309111 | horse_309687 | horse_309689 | horse_347774 | horse_350594 | horse_361329 | horse_361706 | horse_419126 | horse_476499 | horse_482032 | horse_489720 | horse_495321 | horse_498560 | horse_501200 | horse_505726 | horse_522046 | horse_522049 | horse_522054 | horse_571273 | horse_674742 | horse_676464 | horse_676465 | horse_676467 | horse_924268 | horse_965417 | horse_968185 | horse_1029663 | horse_1088499 | horse_1205121 | horse_1205126 | horse_1221385 | horse_1221386 | horse_1222344 | horse_1222345 | horse_1222346 | horse_1222347 | horse_1254317 | horse_1485567 | horse_1485568 | horse_1557297 | horse_2009654 | horse_2013140 | horse_2047448 | horse_2080735 | horse_2081063 | horse_2249229 | horse_2250259 | horse_2250353 | horse_2255452 | horse_2257536 | horse_2263603 | horse_2263634 | horse_2312313 | horse_2312315 | horse_2469649 | horse_2487036 | horse_2506293 | horse_2542449 | horse_2810072 | horse_3158851 | horse_3186303 | horse_3237590 | horse_3258153 | horse_3809606 | horse_3954225 | horse_4294272 | horse_4294273 | horse_4297012 | horse_4729711 | horse_4822931 | horse_4855758 | horse_4859354 | horse_4943786 | horse_5045297 | horse_5071877 | horse_5168454 | horse_5304142 | horse_5340398 | horse_5626816 | horse_5774350 | horse_5851482 | horse_5851483 | horse_5875376 | horse_6516913 | horse_6555433 | horse_6847357 | horse_7414058 | horse_7418999 | horse_7445660 | horse_7594131 | horse_7640637 | horse_7671296 | horse_7797904 | horse_7928242 | horse_8173434 | horse_8196374 | horse_8226987 | horse_8243874 | horse_8257797 | horse_8258569 | horse_8326752 | horse_8443097 | horse_8444055 | horse_8587663 | horse_8698678 | horse_8700174 | horse_8750569 | horse_8776882 | horse_8781581 | horse_8784966 | horse_8838645 | horse_8842202 | horse_8842295 | horse_8859238 | horse_8908192 | horse_9128710 | horse_9193006 | horse_9198585 | horse_9220660 | ... | city_Mohali | city_Mumbai | city_Mysore | city_Najafgarh | city_New Delhi | city_Noida | city_North Charleston | city_Panaji | city_Panchkula | city_Pimpri | city_Powai | city_Pune | city_Ranchi | city_Ras Al Khaimah City | city_Reading | city_Rohtak | city_Rājkot | city_Saint Helier | city_Saint Peter Port | city_Shāhāda | city_Singapore | city_Solāpur | city_Songadh | city_Sonīpat | city_Surat | city_Sūjāngarh | city_Tarn Tāran | city_Thrissur | city_Thāne | city_Tumkūr | city_Udaipur | city_Ullal | city_Vagator | city_Valsād | city_Virār | city_Visakhapatnam | city_Visnagar | city_Wardha | city_Yamunānagar | city_Ābu Road | region_Andhra Pradesh | region_Chandigarh | region_Delhi | region_Dubai | region_England | region_Goa | region_Gujarat | region_Haryana | region_Himachal Pradesh | region_Jharkhand | region_Karnataka | region_Kerala | region_Maharashtra | region_Manipur | region_North Brabant | region_Punjab | region_Rajasthan | region_Raʼs al Khaymah | region_Scotland | region_Singapore | region_South Carolina | region_St Helier | region_St Peter Port | region_Tamil Nadu | region_Telangana | region_Texas | region_Uttar Pradesh | region_Uttarakhand | region_Victoria | region_West Bengal | country_AE | country_AU | country_GB | country_GG | country_IN | country_JE | country_NL | country_SG | country_US | org_AS10029 SHYAM SPECTRA PVT LTD | org_AS11427 Charter Communications Inc | org_AS1221 Telstra Corporation Ltd | org_AS12576 EE Limited | org_AS131269 ACTFIBERNET Pvt Ltd | org_AS132116 Ani Network Pvt Ltd | org_AS13213 UK-2 Limited | org_AS132296 Seven Star Digital Network Private Limited | org_AS132453 TRIPLE PLAY BROADBAND PRIVATE LIMITED | org_AS132497 DIGITAL NETWORK ASSOCIATES PRIVATE LIMITED | org_AS132771 Fibre Air Services Private Limited | org_AS132996 Threesa Infoway Pvt.Ltd. | org_AS133243 Weblink Infoways Private Limited | org_AS133275 Gigantic Infotel Pvt Ltd | org_AS133296 Web Werks India Pvt. Ltd. | org_AS133311 Maxtech Broadband Pvt Ltd | org_AS133648 MNR Broadband Services Pvt. Ltd. | org_AS133661 Netplus Broadband Services Private Limited | org_AS133696 Fastway Transmission Private Limited | org_AS133715 YPT Entertainment House Pvt Ltd | org_AS133720 SOFT CALL CUST-O-CARE PRIVATE LIMITED | org_AS133982 Excitel Broadband Private Limited | org_AS134014 NET 4 U SERVICES PVT LTD | org_AS134022 Genstar Network Solutions Pvt Ltd. | org_AS134040 Vayutel Technology Services Private Limited | org_AS134259 Skyne | org_AS134312 Digi Win Infotainment Mumbai Pvt Ltd | org_AS134341 jdm broadband services pvt ltd | org_AS134674 TATA SKY BROADBAND PRIVATE LIMITED | org_AS134858 iForce Networks | org_AS134884 ARICHWAL IT SERVICES PRIVATE LIMITED | org_AS134886 Super Sonic Broadband Pvt Ltd | org_AS134937 Speed Communicaion | org_AS135239 Sonali Internet Services Pvt Ltd | org_AS135705 Nas Broadband Pvt Ltd | org_AS135718 DISHAWAVES INFONET PVT. LTD | org_AS135738 Adn Broadband | org_AS135776 Inxssinfo mkrt ser pvt ltd. | org_AS135817 ESTO MEDIA PRIVATE LIMITED | org_AS135836 Durga Broadband Pvt Ltd | org_AS135851 Excogitate Technologies Pvt Ltd | org_AS135854 Ruhban Telecommunication Private Limited | org_AS136334 Vortex Netsol Private Limited | org_AS136375 Chl Technology | org_AS136946 Weebo networks Pvt Ltd | org_AS137134 Triple Play Broadband Services India Pvt Ltd | org_AS137627 Nextel Communications India Pvt Ltd | org_AS138272 Hi5 Multimedia Services Pvt Ltd | org_AS138296 Juweriyah Networks Private Limited | org_AS14061 DigitalOcean, LLC | org_AS141257 Om Computer World | org_AS141271 Kay Powernet Services Pvt Ltd | org_AS15169 Google LLC | org_AS15802 Emirates Integrated Telecommunications Company PJSC | org_AS16509 Amazon.com, Inc. | org_AS17488 Hathway IP Over Cable Internet | org_AS17665 AS Number of Indusind Media and communication Ltd. | org_AS17747 SITI NETWORKS LIMITED | org_AS17762 Tata Teleservices Maharashtra Ltd | org_AS17917 Quadrant Televentures Limited | org_AS18196 Seven Star Internet Service Provider | org_AS18207 YOU Broadband & Cable India Ltd. | org_AS18209 Atria Convergence Technologies pvt ltd | org_AS199524 G-Core Labs S.A. | org_AS20057 AT&T Mobility LLC | org_AS21859 Zenlayer Inc | org_AS24186 RailTel Corporation of India Ltd | org_AS24554 Fivenetwork Solution India Pvt Ltd Internet | org_AS24560 Bharti Airtel Ltd., Telemedia Services | org_AS25135 Vodafone Limited | org_AS2856 British Telecommunications PLC | org_AS36351 SoftLayer Technologies Inc. | org_AS38207 Rajesh Multi Channel Pvt Ltd. | org_AS38266 Vodafone India Ltd. | org_AS38457 Honesty Net Solution (I) Pvt Ltd | org_AS41564 Packet Exchange Limited | org_AS45184 Den Digital Entertainment Pvt. Ltd. AS ISP india | org_AS45194 Syscon Infoway Pvt. Ltd. | org_AS45271 Idea Cellular Limited | org_AS45528 Tikona Infinet Ltd. | org_AS45609 Bharti Airtel Ltd. AS for GPRS Service | org_AS45769 D-Vois Broadband Pvt Ltd | org_AS45916 Gujarat Telelink Pvt Ltd | org_AS5384 Emirates Telecommunications Corporation | org_AS55352 Microscan Computers Private Limited | org_AS55832 HOME SYSTEMS PVT.LTD | org_AS55836 Reliance Jio Infocomm Limited | org_AS55862 Wan & Lan Internet Pvt Ltd | org_AS55947 Bangalore Broadband Network Pvt Ltd | org_AS58405 UNITED TELECOMS LIMITED | org_AS58678 Intech Online Private Limited | org_AS58762 candor infosolution Pvt Ltd | org_AS59185 NETRUN TECHNOLOGIES PVT LTD | org_AS62240 Clouvider Limited | org_AS8680 Sure (Guernsey) Limited | org_AS8681 JT (Jersey) Limited | org_AS9009 M247 Ltd | org_AS9829 National Internet Backbone | timezone_America/Chicago | timezone_America/New_York | timezone_Asia/Dubai | timezone_Asia/Kolkata | timezone_Asia/Singapore | timezone_Australia/Melbourne | timezone_Europe/Amsterdam | timezone_Europe/Guernsey | timezone_Europe/Jersey | timezone_Europe/London | Date_2020-11-02 | Date_2020-11-03 | Date_2020-11-05 | Date_2020-12-01 | Date_2020-12-07 | Date_2020-12-08 | Date_2020-12-09 | Date_2020-12-10 | Date_2021-01-22 | Date_2021-01-28 | Date_2021-01-30 | Date_2021-02-04 | Date_2021-02-05 | Date_2021-02-06 | Date_2021-02-08 | Date_2021-02-12 | Date_2021-02-13 | Date_2021-02-14 | Date_2021-02-18 | Date_2021-02-19 | Date_2021-02-20 | Date_2021-02-21 | Date_2021-02-22 | hour_0 | hour_1 | hour_2 | hour_3 | hour_4 | hour_5 | hour_6 | hour_7 | hour_8 | hour_9 | hour_10 | hour_11 | hour_12 | hour_13 | hour_14 | hour_15 | hour_16 | hour_17 | hour_18 | hour_19 | hour_20 | hour_21 | hour_22 | hour_23 | week_3 | week_4 | week_5 | week_6 | week_7 | week_8 | week_45 | week_49 | week_50 | weekday_0 | weekday_1 | weekday_2 | weekday_3 | weekday_4 | weekday_5 | weekday_6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
| 1 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 |
| 2 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 |
3 rows × 1692 columns
from sklearn.feature_selection import SelectKBest,f_classif,chi2
n = SelectKBest(score_func=f_classif,k=10)
numcols = n.fit(X_train_numerical_std,y_train)
X_train_numerical_std.shape
(7046, 10)
# list(X_train_numerical_std.columns.values)
# numerical_fet
# numerical_fet = list(X_train_numerical_std.columns.values)
top_fet={}
## https://machinelearningmastery.com/feature-selection-with-categorical-data/
for i in range(len(n.scores_)):
top_fet[numerical_fet[i]]=n.scores_[i]
top_fet = sorted(top_fet.items(), key=lambda x: x[1],reverse=True)
top_fet = dict(top_fet)
for key, value in top_fet.items():
print('{:<40}|{:>20}'.format(key, value))
marketId | 124.10511415611916 averagePriceMatched_boxcox | 61.772727770114564 averagePriceMatched_log | 61.19977129470197 betRate_boxcox | 60.86421484693711 betRate_log | 57.58078639245812 stake_boxcox | 14.713196568098832 stake_log | 14.026465769436085 averagePriceMatched | 11.03380991476286 betRate | 10.85073875465107 stake | 2.5723177706491747
plt.figure(figsize=(10, 6))
sns.barplot(x=numcols.scores_, y=numerical_fet, color = 'b')
plt.title('Best Numerical Features')
plt.show()
c = SelectKBest(score_func=chi2)
numcols=c.fit(X_train_categorical_encoded,y_train)
top_fet={}
## https://machinelearningmastery.com/feature-selection-with-categorical-data/
for i in range(len(c.scores_)):
top_fet[encodedCatColumnNames[i]]=c.scores_[i]
top_fet = sorted(top_fet.items(), key=lambda x: x[1],reverse=True)
top_fet = dict(top_fet)
# for key, value in top_fet.items():
# print('{:<40}|{:>20}'.format(key, value))
aa = pd.DataFrame(zip(*sorted(zip(numcols.scores_, encodedCatColumnNames), reverse=True))).T
aa.columns = ['score', 'name']
aa.head(10)
| score | name | |
|---|---|---|
| 0 | 2739.130 | week_6 |
| 1 | 2130.435 | weekday_3 |
| 2 | 1978.261 | Date_2021-02-13 |
| 3 | 1826.087 | winnerId_235 |
| 4 | 1826.087 | event_Bangladesh v West Indies |
| 5 | 1673.913 | selectionName_West Indies |
| 6 | 1673.913 | horse_235 |
| 7 | 1217.391 | week_50 |
| 8 | 1065.217 | week_5 |
| 9 | 760.870 | Date_2020-12-10 |
# numcols.scores_[:10]
# encodedCatColumnNames[:10]
plt.figure(figsize=(7,10))
sns.barplot(x=aa['score'].iloc[:50],y=aa['name'][:50], color='b')
plt.title('Top 50 Categorical Features')
plt.show()
X_train_merged = pd.concat([X_train_numerical_std,X_train_categorical_encoded], axis=1)
X_test_merged = pd.concat([X_test_numerical_std,X_test_categorical_encoded], axis=1)
X_train_merged.head()
| stake | stake_log | stake_boxcox | betRate | betRate_log | betRate_boxcox | marketId | averagePriceMatched | averagePriceMatched_log | averagePriceMatched_boxcox | type_BACK | type_LAY | horse_235 | horse_448 | horse_1117 | horse_1703 | horse_2426 | horse_2685 | horse_7407 | horse_7461 | horse_7659 | horse_9162 | horse_9163 | horse_10501 | horse_10761 | horse_10774 | horse_10779 | horse_13360 | horse_14072 | horse_16606 | horse_28191 | horse_28214 | horse_28220 | horse_28223 | horse_37302 | horse_37303 | horse_41433 | horse_44503 | horse_44504 | horse_44507 | horse_44508 | horse_44518 | horse_44519 | horse_44521 | horse_44526 | horse_44785 | horse_44787 | horse_44790 | horse_44793 | horse_44794 | horse_44795 | horse_44797 | horse_44798 | horse_44800 | horse_46726 | horse_47972 | horse_47973 | horse_47998 | horse_47999 | horse_48043 | horse_48044 | horse_48224 | horse_48351 | horse_48451 | horse_48461 | horse_48470 | horse_48756 | horse_48759 | horse_48783 | horse_48785 | horse_48786 | horse_48787 | horse_48793 | horse_48799 | horse_50347 | horse_50349 | horse_55190 | horse_55223 | horse_55243 | horse_55264 | horse_55270 | horse_55271 | horse_56036 | horse_56299 | horse_56301 | horse_56323 | horse_56343 | horse_56363 | horse_56764 | horse_56966 | horse_56967 | horse_58805 | horse_58943 | horse_59044 | horse_60294 | horse_60295 | horse_60297 | horse_60303 | horse_60310 | horse_60443 | horse_62683 | horse_63347 | horse_64374 | horse_64964 | horse_65352 | horse_65778 | horse_66183 | horse_67143 | horse_69718 | horse_69720 | horse_70385 | horse_70468 | horse_77586 | horse_78864 | horse_79323 | horse_79343 | horse_84649 | horse_86359 | horse_113123 | horse_113125 | horse_113187 | horse_113191 | horse_113239 | horse_121724 | horse_127991 | horse_191604 | horse_191607 | horse_198136 | horse_198138 | horse_199184 | horse_199545 | horse_201327 | horse_208035 | horse_214865 | horse_215817 | horse_215821 | horse_215829 | horse_247969 | horse_259394 | horse_269792 | horse_298233 | horse_309111 | horse_309687 | horse_309689 | horse_347774 | horse_350594 | horse_361329 | horse_361706 | horse_419126 | horse_476499 | horse_482032 | horse_489720 | horse_495321 | horse_498560 | horse_501200 | horse_505726 | horse_522046 | horse_522049 | horse_522054 | horse_571273 | horse_674742 | horse_676464 | horse_676465 | horse_676467 | horse_924268 | horse_965417 | horse_968185 | horse_1029663 | horse_1088499 | horse_1205121 | horse_1205126 | horse_1221385 | horse_1221386 | horse_1222344 | horse_1222345 | horse_1222346 | horse_1222347 | horse_1254317 | horse_1485567 | horse_1485568 | horse_1557297 | horse_2009654 | horse_2013140 | horse_2047448 | horse_2080735 | horse_2081063 | horse_2249229 | horse_2250259 | horse_2250353 | horse_2255452 | horse_2257536 | horse_2263603 | horse_2263634 | horse_2312313 | horse_2312315 | horse_2469649 | horse_2487036 | horse_2506293 | horse_2542449 | horse_2810072 | horse_3158851 | horse_3186303 | horse_3237590 | horse_3258153 | horse_3809606 | horse_3954225 | horse_4294272 | horse_4294273 | horse_4297012 | horse_4729711 | horse_4822931 | horse_4855758 | horse_4859354 | horse_4943786 | horse_5045297 | horse_5071877 | horse_5168454 | horse_5304142 | horse_5340398 | horse_5626816 | horse_5774350 | horse_5851482 | horse_5851483 | horse_5875376 | horse_6516913 | horse_6555433 | horse_6847357 | horse_7414058 | horse_7418999 | horse_7445660 | horse_7594131 | horse_7640637 | horse_7671296 | horse_7797904 | horse_7928242 | horse_8173434 | horse_8196374 | horse_8226987 | horse_8243874 | horse_8257797 | horse_8258569 | horse_8326752 | horse_8443097 | horse_8444055 | horse_8587663 | horse_8698678 | horse_8700174 | horse_8750569 | horse_8776882 | horse_8781581 | ... | city_Mohali | city_Mumbai | city_Mysore | city_Najafgarh | city_New Delhi | city_Noida | city_North Charleston | city_Panaji | city_Panchkula | city_Pimpri | city_Powai | city_Pune | city_Ranchi | city_Ras Al Khaimah City | city_Reading | city_Rohtak | city_Rājkot | city_Saint Helier | city_Saint Peter Port | city_Shāhāda | city_Singapore | city_Solāpur | city_Songadh | city_Sonīpat | city_Surat | city_Sūjāngarh | city_Tarn Tāran | city_Thrissur | city_Thāne | city_Tumkūr | city_Udaipur | city_Ullal | city_Vagator | city_Valsād | city_Virār | city_Visakhapatnam | city_Visnagar | city_Wardha | city_Yamunānagar | city_Ābu Road | region_Andhra Pradesh | region_Chandigarh | region_Delhi | region_Dubai | region_England | region_Goa | region_Gujarat | region_Haryana | region_Himachal Pradesh | region_Jharkhand | region_Karnataka | region_Kerala | region_Maharashtra | region_Manipur | region_North Brabant | region_Punjab | region_Rajasthan | region_Raʼs al Khaymah | region_Scotland | region_Singapore | region_South Carolina | region_St Helier | region_St Peter Port | region_Tamil Nadu | region_Telangana | region_Texas | region_Uttar Pradesh | region_Uttarakhand | region_Victoria | region_West Bengal | country_AE | country_AU | country_GB | country_GG | country_IN | country_JE | country_NL | country_SG | country_US | org_AS10029 SHYAM SPECTRA PVT LTD | org_AS11427 Charter Communications Inc | org_AS1221 Telstra Corporation Ltd | org_AS12576 EE Limited | org_AS131269 ACTFIBERNET Pvt Ltd | org_AS132116 Ani Network Pvt Ltd | org_AS13213 UK-2 Limited | org_AS132296 Seven Star Digital Network Private Limited | org_AS132453 TRIPLE PLAY BROADBAND PRIVATE LIMITED | org_AS132497 DIGITAL NETWORK ASSOCIATES PRIVATE LIMITED | org_AS132771 Fibre Air Services Private Limited | org_AS132996 Threesa Infoway Pvt.Ltd. | org_AS133243 Weblink Infoways Private Limited | org_AS133275 Gigantic Infotel Pvt Ltd | org_AS133296 Web Werks India Pvt. Ltd. | org_AS133311 Maxtech Broadband Pvt Ltd | org_AS133648 MNR Broadband Services Pvt. Ltd. | org_AS133661 Netplus Broadband Services Private Limited | org_AS133696 Fastway Transmission Private Limited | org_AS133715 YPT Entertainment House Pvt Ltd | org_AS133720 SOFT CALL CUST-O-CARE PRIVATE LIMITED | org_AS133982 Excitel Broadband Private Limited | org_AS134014 NET 4 U SERVICES PVT LTD | org_AS134022 Genstar Network Solutions Pvt Ltd. | org_AS134040 Vayutel Technology Services Private Limited | org_AS134259 Skyne | org_AS134312 Digi Win Infotainment Mumbai Pvt Ltd | org_AS134341 jdm broadband services pvt ltd | org_AS134674 TATA SKY BROADBAND PRIVATE LIMITED | org_AS134858 iForce Networks | org_AS134884 ARICHWAL IT SERVICES PRIVATE LIMITED | org_AS134886 Super Sonic Broadband Pvt Ltd | org_AS134937 Speed Communicaion | org_AS135239 Sonali Internet Services Pvt Ltd | org_AS135705 Nas Broadband Pvt Ltd | org_AS135718 DISHAWAVES INFONET PVT. LTD | org_AS135738 Adn Broadband | org_AS135776 Inxssinfo mkrt ser pvt ltd. | org_AS135817 ESTO MEDIA PRIVATE LIMITED | org_AS135836 Durga Broadband Pvt Ltd | org_AS135851 Excogitate Technologies Pvt Ltd | org_AS135854 Ruhban Telecommunication Private Limited | org_AS136334 Vortex Netsol Private Limited | org_AS136375 Chl Technology | org_AS136946 Weebo networks Pvt Ltd | org_AS137134 Triple Play Broadband Services India Pvt Ltd | org_AS137627 Nextel Communications India Pvt Ltd | org_AS138272 Hi5 Multimedia Services Pvt Ltd | org_AS138296 Juweriyah Networks Private Limited | org_AS14061 DigitalOcean, LLC | org_AS141257 Om Computer World | org_AS141271 Kay Powernet Services Pvt Ltd | org_AS15169 Google LLC | org_AS15802 Emirates Integrated Telecommunications Company PJSC | org_AS16509 Amazon.com, Inc. | org_AS17488 Hathway IP Over Cable Internet | org_AS17665 AS Number of Indusind Media and communication Ltd. | org_AS17747 SITI NETWORKS LIMITED | org_AS17762 Tata Teleservices Maharashtra Ltd | org_AS17917 Quadrant Televentures Limited | org_AS18196 Seven Star Internet Service Provider | org_AS18207 YOU Broadband & Cable India Ltd. | org_AS18209 Atria Convergence Technologies pvt ltd | org_AS199524 G-Core Labs S.A. | org_AS20057 AT&T Mobility LLC | org_AS21859 Zenlayer Inc | org_AS24186 RailTel Corporation of India Ltd | org_AS24554 Fivenetwork Solution India Pvt Ltd Internet | org_AS24560 Bharti Airtel Ltd., Telemedia Services | org_AS25135 Vodafone Limited | org_AS2856 British Telecommunications PLC | org_AS36351 SoftLayer Technologies Inc. | org_AS38207 Rajesh Multi Channel Pvt Ltd. | org_AS38266 Vodafone India Ltd. | org_AS38457 Honesty Net Solution (I) Pvt Ltd | org_AS41564 Packet Exchange Limited | org_AS45184 Den Digital Entertainment Pvt. Ltd. AS ISP india | org_AS45194 Syscon Infoway Pvt. Ltd. | org_AS45271 Idea Cellular Limited | org_AS45528 Tikona Infinet Ltd. | org_AS45609 Bharti Airtel Ltd. AS for GPRS Service | org_AS45769 D-Vois Broadband Pvt Ltd | org_AS45916 Gujarat Telelink Pvt Ltd | org_AS5384 Emirates Telecommunications Corporation | org_AS55352 Microscan Computers Private Limited | org_AS55832 HOME SYSTEMS PVT.LTD | org_AS55836 Reliance Jio Infocomm Limited | org_AS55862 Wan & Lan Internet Pvt Ltd | org_AS55947 Bangalore Broadband Network Pvt Ltd | org_AS58405 UNITED TELECOMS LIMITED | org_AS58678 Intech Online Private Limited | org_AS58762 candor infosolution Pvt Ltd | org_AS59185 NETRUN TECHNOLOGIES PVT LTD | org_AS62240 Clouvider Limited | org_AS8680 Sure (Guernsey) Limited | org_AS8681 JT (Jersey) Limited | org_AS9009 M247 Ltd | org_AS9829 National Internet Backbone | timezone_America/Chicago | timezone_America/New_York | timezone_Asia/Dubai | timezone_Asia/Kolkata | timezone_Asia/Singapore | timezone_Australia/Melbourne | timezone_Europe/Amsterdam | timezone_Europe/Guernsey | timezone_Europe/Jersey | timezone_Europe/London | Date_2020-11-02 | Date_2020-11-03 | Date_2020-11-05 | Date_2020-12-01 | Date_2020-12-07 | Date_2020-12-08 | Date_2020-12-09 | Date_2020-12-10 | Date_2021-01-22 | Date_2021-01-28 | Date_2021-01-30 | Date_2021-02-04 | Date_2021-02-05 | Date_2021-02-06 | Date_2021-02-08 | Date_2021-02-12 | Date_2021-02-13 | Date_2021-02-14 | Date_2021-02-18 | Date_2021-02-19 | Date_2021-02-20 | Date_2021-02-21 | Date_2021-02-22 | hour_0 | hour_1 | hour_2 | hour_3 | hour_4 | hour_5 | hour_6 | hour_7 | hour_8 | hour_9 | hour_10 | hour_11 | hour_12 | hour_13 | hour_14 | hour_15 | hour_16 | hour_17 | hour_18 | hour_19 | hour_20 | hour_21 | hour_22 | hour_23 | week_3 | week_4 | week_5 | week_6 | week_7 | week_8 | week_45 | week_49 | week_50 | weekday_0 | weekday_1 | weekday_2 | weekday_3 | weekday_4 | weekday_5 | weekday_6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.202 | -0.682 | -0.659 | -0.106 | -0.121 | 0.157 | 0.756 | -0.106 | -0.135 | 0.130 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
| 1 | -0.202 | -0.725 | -0.706 | -0.028 | 0.598 | 1.246 | 0.061 | -0.029 | 0.594 | 1.244 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 |
| 2 | -0.202 | -0.774 | -0.759 | -0.151 | -0.701 | -1.366 | 0.826 | -0.148 | -0.699 | -1.362 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 |
| 3 | -0.094 | 0.796 | 0.823 | -0.098 | -0.041 | 0.313 | 0.745 | -0.097 | -0.042 | 0.314 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
| 4 | -0.192 | -0.110 | -0.056 | -0.062 | 0.316 | 0.895 | -0.098 | -0.062 | 0.313 | 0.894 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
5 rows × 1702 columns
X_test_merged.head()
| stake | stake_log | stake_boxcox | betRate | betRate_log | betRate_boxcox | marketId | averagePriceMatched | averagePriceMatched_log | averagePriceMatched_boxcox | type_BACK | type_LAY | horse_235 | horse_448 | horse_1117 | horse_1703 | horse_2426 | horse_2685 | horse_7407 | horse_7461 | horse_7659 | horse_9162 | horse_9163 | horse_10501 | horse_10761 | horse_10774 | horse_10779 | horse_13360 | horse_14072 | horse_16606 | horse_28191 | horse_28214 | horse_28220 | horse_28223 | horse_37302 | horse_37303 | horse_41433 | horse_44503 | horse_44504 | horse_44507 | horse_44508 | horse_44518 | horse_44519 | horse_44521 | horse_44526 | horse_44785 | horse_44787 | horse_44790 | horse_44793 | horse_44794 | horse_44795 | horse_44797 | horse_44798 | horse_44800 | horse_46726 | horse_47972 | horse_47973 | horse_47998 | horse_47999 | horse_48043 | horse_48044 | horse_48224 | horse_48351 | horse_48451 | horse_48461 | horse_48470 | horse_48756 | horse_48759 | horse_48783 | horse_48785 | horse_48786 | horse_48787 | horse_48793 | horse_48799 | horse_50347 | horse_50349 | horse_55190 | horse_55223 | horse_55243 | horse_55264 | horse_55270 | horse_55271 | horse_56036 | horse_56299 | horse_56301 | horse_56323 | horse_56343 | horse_56363 | horse_56764 | horse_56966 | horse_56967 | horse_58805 | horse_58943 | horse_59044 | horse_60294 | horse_60295 | horse_60297 | horse_60303 | horse_60310 | horse_60443 | horse_62683 | horse_63347 | horse_64374 | horse_64964 | horse_65352 | horse_65778 | horse_66183 | horse_67143 | horse_69718 | horse_69720 | horse_70385 | horse_70468 | horse_77586 | horse_78864 | horse_79323 | horse_79343 | horse_84649 | horse_86359 | horse_113123 | horse_113125 | horse_113187 | horse_113191 | horse_113239 | horse_121724 | horse_127991 | horse_191604 | horse_191607 | horse_198136 | horse_198138 | horse_199184 | horse_199545 | horse_201327 | horse_208035 | horse_214865 | horse_215817 | horse_215821 | horse_215829 | horse_247969 | horse_259394 | horse_269792 | horse_298233 | horse_309111 | horse_309687 | horse_309689 | horse_347774 | horse_350594 | horse_361329 | horse_361706 | horse_419126 | horse_476499 | horse_482032 | horse_489720 | horse_495321 | horse_498560 | horse_501200 | horse_505726 | horse_522046 | horse_522049 | horse_522054 | horse_571273 | horse_674742 | horse_676464 | horse_676465 | horse_676467 | horse_924268 | horse_965417 | horse_968185 | horse_1029663 | horse_1088499 | horse_1205121 | horse_1205126 | horse_1221385 | horse_1221386 | horse_1222344 | horse_1222345 | horse_1222346 | horse_1222347 | horse_1254317 | horse_1485567 | horse_1485568 | horse_1557297 | horse_2009654 | horse_2013140 | horse_2047448 | horse_2080735 | horse_2081063 | horse_2249229 | horse_2250259 | horse_2250353 | horse_2255452 | horse_2257536 | horse_2263603 | horse_2263634 | horse_2312313 | horse_2312315 | horse_2469649 | horse_2487036 | horse_2506293 | horse_2542449 | horse_2810072 | horse_3158851 | horse_3186303 | horse_3237590 | horse_3258153 | horse_3809606 | horse_3954225 | horse_4294272 | horse_4294273 | horse_4297012 | horse_4729711 | horse_4822931 | horse_4855758 | horse_4859354 | horse_4943786 | horse_5045297 | horse_5071877 | horse_5168454 | horse_5304142 | horse_5340398 | horse_5626816 | horse_5774350 | horse_5851482 | horse_5851483 | horse_5875376 | horse_6516913 | horse_6555433 | horse_6847357 | horse_7414058 | horse_7418999 | horse_7445660 | horse_7594131 | horse_7640637 | horse_7671296 | horse_7797904 | horse_7928242 | horse_8173434 | horse_8196374 | horse_8226987 | horse_8243874 | horse_8257797 | horse_8258569 | horse_8326752 | horse_8443097 | horse_8444055 | horse_8587663 | horse_8698678 | horse_8700174 | horse_8750569 | horse_8776882 | horse_8781581 | ... | city_Mohali | city_Mumbai | city_Mysore | city_Najafgarh | city_New Delhi | city_Noida | city_North Charleston | city_Panaji | city_Panchkula | city_Pimpri | city_Powai | city_Pune | city_Ranchi | city_Ras Al Khaimah City | city_Reading | city_Rohtak | city_Rājkot | city_Saint Helier | city_Saint Peter Port | city_Shāhāda | city_Singapore | city_Solāpur | city_Songadh | city_Sonīpat | city_Surat | city_Sūjāngarh | city_Tarn Tāran | city_Thrissur | city_Thāne | city_Tumkūr | city_Udaipur | city_Ullal | city_Vagator | city_Valsād | city_Virār | city_Visakhapatnam | city_Visnagar | city_Wardha | city_Yamunānagar | city_Ābu Road | region_Andhra Pradesh | region_Chandigarh | region_Delhi | region_Dubai | region_England | region_Goa | region_Gujarat | region_Haryana | region_Himachal Pradesh | region_Jharkhand | region_Karnataka | region_Kerala | region_Maharashtra | region_Manipur | region_North Brabant | region_Punjab | region_Rajasthan | region_Raʼs al Khaymah | region_Scotland | region_Singapore | region_South Carolina | region_St Helier | region_St Peter Port | region_Tamil Nadu | region_Telangana | region_Texas | region_Uttar Pradesh | region_Uttarakhand | region_Victoria | region_West Bengal | country_AE | country_AU | country_GB | country_GG | country_IN | country_JE | country_NL | country_SG | country_US | org_AS10029 SHYAM SPECTRA PVT LTD | org_AS11427 Charter Communications Inc | org_AS1221 Telstra Corporation Ltd | org_AS12576 EE Limited | org_AS131269 ACTFIBERNET Pvt Ltd | org_AS132116 Ani Network Pvt Ltd | org_AS13213 UK-2 Limited | org_AS132296 Seven Star Digital Network Private Limited | org_AS132453 TRIPLE PLAY BROADBAND PRIVATE LIMITED | org_AS132497 DIGITAL NETWORK ASSOCIATES PRIVATE LIMITED | org_AS132771 Fibre Air Services Private Limited | org_AS132996 Threesa Infoway Pvt.Ltd. | org_AS133243 Weblink Infoways Private Limited | org_AS133275 Gigantic Infotel Pvt Ltd | org_AS133296 Web Werks India Pvt. Ltd. | org_AS133311 Maxtech Broadband Pvt Ltd | org_AS133648 MNR Broadband Services Pvt. Ltd. | org_AS133661 Netplus Broadband Services Private Limited | org_AS133696 Fastway Transmission Private Limited | org_AS133715 YPT Entertainment House Pvt Ltd | org_AS133720 SOFT CALL CUST-O-CARE PRIVATE LIMITED | org_AS133982 Excitel Broadband Private Limited | org_AS134014 NET 4 U SERVICES PVT LTD | org_AS134022 Genstar Network Solutions Pvt Ltd. | org_AS134040 Vayutel Technology Services Private Limited | org_AS134259 Skyne | org_AS134312 Digi Win Infotainment Mumbai Pvt Ltd | org_AS134341 jdm broadband services pvt ltd | org_AS134674 TATA SKY BROADBAND PRIVATE LIMITED | org_AS134858 iForce Networks | org_AS134884 ARICHWAL IT SERVICES PRIVATE LIMITED | org_AS134886 Super Sonic Broadband Pvt Ltd | org_AS134937 Speed Communicaion | org_AS135239 Sonali Internet Services Pvt Ltd | org_AS135705 Nas Broadband Pvt Ltd | org_AS135718 DISHAWAVES INFONET PVT. LTD | org_AS135738 Adn Broadband | org_AS135776 Inxssinfo mkrt ser pvt ltd. | org_AS135817 ESTO MEDIA PRIVATE LIMITED | org_AS135836 Durga Broadband Pvt Ltd | org_AS135851 Excogitate Technologies Pvt Ltd | org_AS135854 Ruhban Telecommunication Private Limited | org_AS136334 Vortex Netsol Private Limited | org_AS136375 Chl Technology | org_AS136946 Weebo networks Pvt Ltd | org_AS137134 Triple Play Broadband Services India Pvt Ltd | org_AS137627 Nextel Communications India Pvt Ltd | org_AS138272 Hi5 Multimedia Services Pvt Ltd | org_AS138296 Juweriyah Networks Private Limited | org_AS14061 DigitalOcean, LLC | org_AS141257 Om Computer World | org_AS141271 Kay Powernet Services Pvt Ltd | org_AS15169 Google LLC | org_AS15802 Emirates Integrated Telecommunications Company PJSC | org_AS16509 Amazon.com, Inc. | org_AS17488 Hathway IP Over Cable Internet | org_AS17665 AS Number of Indusind Media and communication Ltd. | org_AS17747 SITI NETWORKS LIMITED | org_AS17762 Tata Teleservices Maharashtra Ltd | org_AS17917 Quadrant Televentures Limited | org_AS18196 Seven Star Internet Service Provider | org_AS18207 YOU Broadband & Cable India Ltd. | org_AS18209 Atria Convergence Technologies pvt ltd | org_AS199524 G-Core Labs S.A. | org_AS20057 AT&T Mobility LLC | org_AS21859 Zenlayer Inc | org_AS24186 RailTel Corporation of India Ltd | org_AS24554 Fivenetwork Solution India Pvt Ltd Internet | org_AS24560 Bharti Airtel Ltd., Telemedia Services | org_AS25135 Vodafone Limited | org_AS2856 British Telecommunications PLC | org_AS36351 SoftLayer Technologies Inc. | org_AS38207 Rajesh Multi Channel Pvt Ltd. | org_AS38266 Vodafone India Ltd. | org_AS38457 Honesty Net Solution (I) Pvt Ltd | org_AS41564 Packet Exchange Limited | org_AS45184 Den Digital Entertainment Pvt. Ltd. AS ISP india | org_AS45194 Syscon Infoway Pvt. Ltd. | org_AS45271 Idea Cellular Limited | org_AS45528 Tikona Infinet Ltd. | org_AS45609 Bharti Airtel Ltd. AS for GPRS Service | org_AS45769 D-Vois Broadband Pvt Ltd | org_AS45916 Gujarat Telelink Pvt Ltd | org_AS5384 Emirates Telecommunications Corporation | org_AS55352 Microscan Computers Private Limited | org_AS55832 HOME SYSTEMS PVT.LTD | org_AS55836 Reliance Jio Infocomm Limited | org_AS55862 Wan & Lan Internet Pvt Ltd | org_AS55947 Bangalore Broadband Network Pvt Ltd | org_AS58405 UNITED TELECOMS LIMITED | org_AS58678 Intech Online Private Limited | org_AS58762 candor infosolution Pvt Ltd | org_AS59185 NETRUN TECHNOLOGIES PVT LTD | org_AS62240 Clouvider Limited | org_AS8680 Sure (Guernsey) Limited | org_AS8681 JT (Jersey) Limited | org_AS9009 M247 Ltd | org_AS9829 National Internet Backbone | timezone_America/Chicago | timezone_America/New_York | timezone_Asia/Dubai | timezone_Asia/Kolkata | timezone_Asia/Singapore | timezone_Australia/Melbourne | timezone_Europe/Amsterdam | timezone_Europe/Guernsey | timezone_Europe/Jersey | timezone_Europe/London | Date_2020-11-02 | Date_2020-11-03 | Date_2020-11-05 | Date_2020-12-01 | Date_2020-12-07 | Date_2020-12-08 | Date_2020-12-09 | Date_2020-12-10 | Date_2021-01-22 | Date_2021-01-28 | Date_2021-01-30 | Date_2021-02-04 | Date_2021-02-05 | Date_2021-02-06 | Date_2021-02-08 | Date_2021-02-12 | Date_2021-02-13 | Date_2021-02-14 | Date_2021-02-18 | Date_2021-02-19 | Date_2021-02-20 | Date_2021-02-21 | Date_2021-02-22 | hour_0 | hour_1 | hour_2 | hour_3 | hour_4 | hour_5 | hour_6 | hour_7 | hour_8 | hour_9 | hour_10 | hour_11 | hour_12 | hour_13 | hour_14 | hour_15 | hour_16 | hour_17 | hour_18 | hour_19 | hour_20 | hour_21 | hour_22 | hour_23 | week_3 | week_4 | week_5 | week_6 | week_7 | week_8 | week_45 | week_49 | week_50 | weekday_0 | weekday_1 | weekday_2 | weekday_3 | weekday_4 | weekday_5 | weekday_6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.202 | -0.682 | -0.659 | -0.153 | -0.738 | -1.490 | 0.745 | -0.150 | -0.735 | -1.486 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
| 1 | 0.102 | 1.218 | 1.202 | -0.071 | 0.238 | 0.783 | -1.841 | -0.068 | 0.254 | 0.810 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 |
| 2 | -0.205 | -1.886 | -2.046 | 6.388 | 6.999 | 2.527 | 0.702 | 6.215 | 6.964 | 2.522 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
| 3 | -0.204 | -1.127 | -1.154 | -0.152 | -0.719 | -1.427 | 0.622 | -0.149 | -0.717 | -1.423 | 0.000 | 1.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 4 | -0.202 | -0.682 | -0.659 | -0.051 | 0.411 | 1.023 | 0.756 | -0.053 | 0.398 | 1.009 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | ... | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
5 rows × 1702 columns
print(X_train_merged.shape)
print(X_test_merged.shape)
(7046, 1702) (3020, 1702)
%%time
from sklearn.manifold import TSNE
tsne2d = TSNE(
n_components=2,
perplexity=30.0, # Default
init='random', # pca
random_state=101,
method='barnes_hut',
n_iter=1000,
verbose=2,
angle=0.5
).fit_transform(X_train_merged[0:2000])
[t-SNE] Computing 91 nearest neighbors... [t-SNE] Indexed 2000 samples in 0.888s... [t-SNE] Computed neighbors for 2000 samples in 16.003s... [t-SNE] Computed conditional probabilities for sample 1000 / 2000 [t-SNE] Computed conditional probabilities for sample 2000 / 2000 [t-SNE] Mean sigma: 1.692283 [t-SNE] Computed conditional probabilities in 0.098s [t-SNE] Iteration 50: error = 77.9813309, gradient norm = 0.0823796 (50 iterations in 1.142s) [t-SNE] Iteration 100: error = 72.8528671, gradient norm = 0.0179570 (50 iterations in 0.819s) [t-SNE] Iteration 150: error = 72.4015961, gradient norm = 0.0038608 (50 iterations in 0.804s) [t-SNE] Iteration 200: error = 72.2978973, gradient norm = 0.0013333 (50 iterations in 0.988s) [t-SNE] Iteration 250: error = 72.2682877, gradient norm = 0.0015928 (50 iterations in 0.788s) [t-SNE] KL divergence after 250 iterations with early exaggeration: 72.268288 [t-SNE] Iteration 300: error = 1.2277400, gradient norm = 0.0010195 (50 iterations in 0.679s) [t-SNE] Iteration 350: error = 1.0007497, gradient norm = 0.0004100 (50 iterations in 0.725s) [t-SNE] Iteration 400: error = 0.9254277, gradient norm = 0.0002582 (50 iterations in 0.682s) [t-SNE] Iteration 450: error = 0.8879159, gradient norm = 0.0002141 (50 iterations in 0.670s) [t-SNE] Iteration 500: error = 0.8687216, gradient norm = 0.0001629 (50 iterations in 0.710s) [t-SNE] Iteration 550: error = 0.8574973, gradient norm = 0.0001472 (50 iterations in 0.679s) [t-SNE] Iteration 600: error = 0.8493732, gradient norm = 0.0001436 (50 iterations in 0.735s) [t-SNE] Iteration 650: error = 0.8437960, gradient norm = 0.0001206 (50 iterations in 0.844s) [t-SNE] Iteration 700: error = 0.8393996, gradient norm = 0.0001041 (50 iterations in 0.781s) [t-SNE] Iteration 750: error = 0.8358116, gradient norm = 0.0001066 (50 iterations in 0.748s) [t-SNE] Iteration 800: error = 0.8326046, gradient norm = 0.0001007 (50 iterations in 0.782s) [t-SNE] Iteration 850: error = 0.8298184, gradient norm = 0.0000992 (50 iterations in 0.769s) [t-SNE] Iteration 900: error = 0.8277671, gradient norm = 0.0000758 (50 iterations in 0.718s) [t-SNE] Iteration 950: error = 0.8254353, gradient norm = 0.0000853 (50 iterations in 0.693s) [t-SNE] Iteration 1000: error = 0.8238515, gradient norm = 0.0000854 (50 iterations in 0.695s) [t-SNE] KL divergence after 1000 iterations: 0.823851 Wall time: 32.5 s
y_ = y_train[:2000].values
df = pd.DataFrame({'x':tsne2d[:,0], 'y':tsne2d[:,1] ,'label':y_})
# draw the plot in appropriate place in the grid
sns.lmplot(data=df, x='x', y='y', hue='label', fit_reg=False, height=6,palette="Set2",)
plt.title("perplexity : {} and max_iter : {}".format(30, 1000))
plt.show()
# This function plots the confusion matrices.
def plot_confusion_matrix(test_y, predict_y):
"""
plot_confusion_matrix(test_y, predict_y)
: plot confusion, precision, recall heatmap.
test_y : ground truth y values.
predict_y : predicted y values.
"""
C = confusion_matrix(test_y, predict_y)
A =(((C.T)/(C.sum(axis=1))).T)
B =(C/C.sum(axis=0))
plt.figure(figsize=(16,4))
labels = [0,1]
plt.subplot(1, 3, 1)
sns.heatmap(C, annot=True, fmt=".3f", xticklabels=labels, yticklabels=labels,cmap="Blues")
plt.xlabel('Predicted Class')
plt.ylabel('Original Class')
plt.title("Confusion matrix")
plt.subplot(1, 3, 2)
sns.heatmap(B, annot=True, fmt=".3f", xticklabels=labels, yticklabels=labels,cmap="Reds")
plt.xlabel('Predicted Class')
plt.ylabel('Original Class')
plt.title("Precision matrix")
plt.subplot(1, 3, 3)
sns.heatmap(A, annot=True, fmt=".3f", xticklabels=labels, yticklabels=labels,cmap="Greens")
plt.xlabel('Predicted Class')
plt.ylabel('Original Class')
plt.title("Recall matrix")
plt.show()
from sklearn.metrics.classification import accuracy_score, log_loss
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV , train_test_split
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split,KFold,cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score , f1_score , make_scorer
from sklearn.preprocessing import StandardScaler,OneHotEncoder , LabelEncoder ,normalize
from sklearn.feature_selection import SelectKBest,f_classif,chi2
from sklearn.metrics.classification import accuracy_score, log_loss
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import SGDClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score,accuracy_score,precision_score,recall_score,f1_score
from sklearn.metrics import confusion_matrix, roc_curve, auc
test_len = len(y_test)
predicted_y = np.zeros((test_len,2))
for i in range(test_len):
rand_probs = np.random.rand(1,2)
predicted_y[i] = ((rand_probs/sum(sum(rand_probs)))[0])
print("Log loss on Test Data using Random Model : ",log_loss(y_test, predicted_y, eps=1e-15))
print("Accuracy on Test Data using Random Model : ",accuracy_score(y_test, predicted_y[:,1].round()))
predicted_y =np.argmax(predicted_y, axis=1)
plot_confusion_matrix(y_test, predicted_y)
Log loss on Test Data using Random Model : 0.8827376998526446 Accuracy on Test Data using Random Model : 0.4996688741721854
alpha = [10 ** x for x in range(-5, 5)] # hyperparam for SGD classifier.
log_error_array=[]
for i in alpha:
clf = SGDClassifier(alpha=i, penalty='l2', loss='log', random_state=42,class_weight="balanced")
clf.fit(X_train_merged, y_train)
sig_clf = CalibratedClassifierCV(clf, method="sigmoid")
sig_clf.fit(X_train_merged, y_train)
predict_y = sig_clf.predict_proba(X_test_merged)
log_error_array.append(log_loss(y_test, predict_y, labels=clf.classes_, eps=1e-15))
print('For values of alpha = ', i, "The log loss is:",log_loss(y_test, predict_y, labels=clf.classes_, eps=1e-15))
print("\t\t Accuracy :", accuracy_score(y_test,predicted_y)*100)
For values of alpha = 1e-05 The log loss is: 0.007878108145857085 Accuracy : 49.966887417218544 For values of alpha = 0.0001 The log loss is: 0.008090953070734134 Accuracy : 49.966887417218544 For values of alpha = 0.001 The log loss is: 0.00882788112569219 Accuracy : 49.966887417218544 For values of alpha = 0.01 The log loss is: 0.006496588260556823 Accuracy : 49.966887417218544 For values of alpha = 0.1 The log loss is: 0.012453772987171213 Accuracy : 49.966887417218544 For values of alpha = 1 The log loss is: 0.02663188756281457 Accuracy : 49.966887417218544 For values of alpha = 10 The log loss is: 0.03598991431186033 Accuracy : 49.966887417218544 For values of alpha = 100 The log loss is: 0.03734409165258908 Accuracy : 49.966887417218544 For values of alpha = 1000 The log loss is: 0.03744146839307445 Accuracy : 49.966887417218544 For values of alpha = 10000 The log loss is: 0.037432052791705535 Accuracy : 49.966887417218544
fig, ax = plt.subplots()
ax.plot(alpha, log_error_array,c='g')
for i, txt in enumerate(np.round(log_error_array,3)):
ax.annotate((alpha[i],np.round(txt,3)), (alpha[i],log_error_array[i]))
plt.grid()
plt.title("Cross Validation Error for each alpha")
plt.xlabel("Alpha i's")
plt.ylabel("Error measure")
plt.rcParams["figure.figsize"] = [10,7]
plt.show()
best_alpha = np.argmin(log_error_array)
best_alpha = np.argmin(log_error_array)
clf = SGDClassifier(alpha=alpha[best_alpha], penalty='l2', loss='log', random_state=42)
clf.fit(X_train_merged, y_train)
sig_clf = CalibratedClassifierCV(clf, method="sigmoid")
sig_clf.fit(X_train_merged, y_train)
predict_y = sig_clf.predict_proba(X_train_merged)
print('For values of best alpha = ', alpha[best_alpha], "The train log loss is:",log_loss(y_train, predict_y, labels=clf.classes_, eps=1e-15))
predict_y = sig_clf.predict_proba(X_test_merged)
print('For values of best alpha = ', alpha[best_alpha], "The test log loss is:",log_loss(y_test, predict_y, labels=clf.classes_, eps=1e-15))
predicted_y =np.argmax(predict_y,axis=1)
print("Accuracy :", accuracy_score(y_test,predicted_y)*100)
plot_confusion_matrix(y_test, predicted_y)
For values of best alpha = 0.01 The train log loss is: 0.01594966582802052 For values of best alpha = 0.01 The test log loss is: 0.019633528535291423 Accuracy : 99.60264900662251
accuracy = {}
roc_r = {}
def train_model(model):
# Checking accuracy
model = model.fit(X_train_merged, y_train)
pred = model.predict(X_test_merged)
acc = accuracy_score(y_test, pred)*100
accuracy[model] = acc
print('accuracy_score',acc)
print('precision_score',precision_score(y_test, pred)*100)
print('recall_score',recall_score(y_test, pred)*100)
print('f1_score',f1_score(y_test, pred)*100)
roc_score = roc_auc_score(y_test, pred)*100
roc_r[model] = roc_score
print('roc_auc_score',roc_score)
# confusion matrix
print('confusion_matrix')
plot_confusion_matrix(y_test,pred)
fpr, tpr, threshold = roc_curve(y_test, pred)
roc_auc = auc(fpr, tpr)*100
plt.figure(figsize=(4,4))
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
lr = LogisticRegression(C=5.0,class_weight="balanced", max_iter= 10000)
train_model(lr)
accuracy_score 99.93377483443709 precision_score 100.0 recall_score 90.0 f1_score 94.73684210526316 roc_auc_score 95.0 confusion_matrix
dtc = DecisionTreeClassifier(class_weight="balanced")
train_model(dtc)
accuracy_score 99.93377483443709 precision_score 100.0 recall_score 90.0 f1_score 94.73684210526316 roc_auc_score 95.0 confusion_matrix
rfc = RandomForestClassifier(n_estimators=100,criterion='gini',class_weight="balanced")
train_model(rfc)
accuracy_score 99.70198675496688 precision_score 100.0 recall_score 55.00000000000001 f1_score 70.96774193548387 roc_auc_score 77.5 confusion_matrix
scale_pos_weight = np.floor(y_train.value_counts()[0]/y_train.value_counts()[1])
xgb = XGBClassifier(scale_pos_weight = scale_pos_weight)
train_model(xgb)
accuracy_score 99.93377483443709 precision_score 100.0 recall_score 90.0 f1_score 94.73684210526316 roc_auc_score 95.0 confusion_matrix
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
n_samples = len(y_train)
n_classes = 2
weight = n_samples / (n_classes * np.bincount(y_train))
inputDim = X_train_merged.shape[1]
class_weight = {0: np.ceil(weight[0]),
1: np.ceil(weight[1])}
class_weight
{0: 1.0, 1: 77.0}
epoch = 30
batch = 1000
# define the keras model
model = Sequential()
model.add(Dense(64, input_dim=inputDim, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
Model: "sequential_6" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_21 (Dense) (None, 64) 108992 _________________________________________________________________ dense_22 (Dense) (None, 32) 2080 _________________________________________________________________ dense_23 (Dense) (None, 8) 264 _________________________________________________________________ dense_24 (Dense) (None, 1) 9 ================================================================= Total params: 111,345 Trainable params: 111,345 Non-trainable params: 0 _________________________________________________________________
# fit the keras model on the dataset
history = model.fit(X_train_merged, y_train, epochs=epoch, batch_size=batch, class_weight=class_weight, verbose=1,
validation_data = (X_test_merged,y_test))
Train on 7046 samples, validate on 3020 samples Epoch 1/30 7046/7046 [==============================] - 1s 173us/step - loss: 1.0219 - accuracy: 0.1764 - val_loss: 0.6927 - val_accuracy: 0.7781 Epoch 2/30 7046/7046 [==============================] - 0s 70us/step - loss: 0.9400 - accuracy: 0.8737 - val_loss: 0.6423 - val_accuracy: 0.9500 Epoch 3/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.8311 - accuracy: 0.9654 - val_loss: 0.5396 - val_accuracy: 0.9838 Epoch 4/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.6687 - accuracy: 0.9926 - val_loss: 0.3895 - val_accuracy: 0.9954 Epoch 5/30 7046/7046 [==============================] - 1s 86us/step - loss: 0.4686 - accuracy: 0.9994 - val_loss: 0.2296 - val_accuracy: 0.9980 Epoch 6/30 7046/7046 [==============================] - 1s 71us/step - loss: 0.2705 - accuracy: 0.9997 - val_loss: 0.1082 - val_accuracy: 0.9990 Epoch 7/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.1369 - accuracy: 0.9999 - val_loss: 0.0472 - val_accuracy: 0.9990 Epoch 8/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0653 - accuracy: 1.0000 - val_loss: 0.0239 - val_accuracy: 0.9990 Epoch 9/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0326 - accuracy: 1.0000 - val_loss: 0.0140 - val_accuracy: 0.9990 Epoch 10/30 7046/7046 [==============================] - 0s 70us/step - loss: 0.0171 - accuracy: 1.0000 - val_loss: 0.0094 - val_accuracy: 0.9990 Epoch 11/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0111 - accuracy: 1.0000 - val_loss: 0.0074 - val_accuracy: 0.9993 Epoch 12/30 7046/7046 [==============================] - 0s 70us/step - loss: 0.0078 - accuracy: 1.0000 - val_loss: 0.0063 - val_accuracy: 0.9993 Epoch 13/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0057 - accuracy: 1.0000 - val_loss: 0.0056 - val_accuracy: 0.9993 Epoch 14/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0046 - accuracy: 1.0000 - val_loss: 0.0052 - val_accuracy: 0.9993 Epoch 15/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0038 - accuracy: 1.0000 - val_loss: 0.0049 - val_accuracy: 0.9993 Epoch 16/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0032 - accuracy: 1.0000 - val_loss: 0.0046 - val_accuracy: 0.9993 Epoch 17/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0027 - accuracy: 1.0000 - val_loss: 0.0046 - val_accuracy: 0.9993 Epoch 18/30 7046/7046 [==============================] - 0s 70us/step - loss: 0.0024 - accuracy: 1.0000 - val_loss: 0.0044 - val_accuracy: 0.9993 Epoch 19/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0021 - accuracy: 1.0000 - val_loss: 0.0043 - val_accuracy: 0.9993 Epoch 20/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0018 - accuracy: 1.0000 - val_loss: 0.0042 - val_accuracy: 0.9993 Epoch 21/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0016 - accuracy: 1.0000 - val_loss: 0.0042 - val_accuracy: 0.9993 Epoch 22/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0015 - accuracy: 1.0000 - val_loss: 0.0041 - val_accuracy: 0.9993 Epoch 23/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0014 - accuracy: 1.0000 - val_loss: 0.0040 - val_accuracy: 0.9993 Epoch 24/30 7046/7046 [==============================] - 0s 69us/step - loss: 0.0012 - accuracy: 1.0000 - val_loss: 0.0040 - val_accuracy: 0.9993 Epoch 25/30 7046/7046 [==============================] - 1s 86us/step - loss: 0.0011 - accuracy: 1.0000 - val_loss: 0.0039 - val_accuracy: 0.9993 Epoch 26/30 7046/7046 [==============================] - 1s 73us/step - loss: 9.9734e-04 - accuracy: 1.0000 - val_loss: 0.0039 - val_accuracy: 0.9993 Epoch 27/30 7046/7046 [==============================] - 1s 71us/step - loss: 9.1696e-04 - accuracy: 1.0000 - val_loss: 0.0039 - val_accuracy: 0.9993 Epoch 28/30 7046/7046 [==============================] - 1s 87us/step - loss: 8.4447e-04 - accuracy: 1.0000 - val_loss: 0.0039 - val_accuracy: 0.9993 Epoch 29/30 7046/7046 [==============================] - 1s 83us/step - loss: 7.8379e-04 - accuracy: 1.0000 - val_loss: 0.0039 - val_accuracy: 0.9993 Epoch 30/30 7046/7046 [==============================] - 1s 77us/step - loss: 7.3134e-04 - accuracy: 1.0000 - val_loss: 0.0039 - val_accuracy: 0.9993
# evaluate the keras model
_, accuracy = model.evaluate(X_train_merged, y_train)
print('Accuracy: %.2f' % (accuracy*100))
7046/7046 [==============================] - 1s 75us/step Accuracy: 100.00
# evaluate the keras model
_, accuracy = model.evaluate(X_test_merged, y_test)
print('Accuracy: %.2f' % (accuracy*100))
3020/3020 [==============================] - 0s 71us/step Accuracy: 99.93
# predictions = model.predict(X_test_merged)
predictions = model.predict_classes(X_test_merged)
y_classes = predictions
confusion_matrix(y_test.values, y_classes)
array([[3000, 0],
[ 2, 18]], dtype=int64)
plot_confusion_matrix(y_test.values, y_classes)
score = roc_auc_score(y_test.values, y_classes)
print("ROC Score : ", score)
ROC Score : 0.95
print('precision_score',precision_score(y_test.values, y_classes)*100)
print('recall_score',recall_score(y_test.values, y_classes)*100)
print('f1_score',f1_score(y_test.values, y_classes)*100)
precision_score 100.0 recall_score 90.0 f1_score 94.73684210526316
def loss_acc_plot(history=history):
plt.figure(figsize=(16, 5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], linestyle='--', marker='o')
plt.plot(history.history['val_accuracy'], linestyle='--', marker='o')
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='lower right')
plt.subplot(1,2,2)
plt.plot(history.history['loss'], linestyle='--', marker='o')
plt.plot(history.history['val_loss'], linestyle='--', marker='o')
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
loss_acc_plot(history)